1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 /* This could be moved to matimpl.h */ 12 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 13 { 14 Mat preallocator; 15 PetscInt r,rstart,rend; 16 PetscInt bs,i,m,n,M,N; 17 PetscBool cong = PETSC_TRUE; 18 PetscErrorCode ierr; 19 20 PetscFunctionBegin; 21 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 22 PetscValidLogicalCollectiveInt(B,nm,2); 23 for (i = 0; i < nm; i++) { 24 PetscValidHeaderSpecific(X[i],MAT_CLASSID,3); 25 ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr); 26 if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts"); 27 } 28 PetscValidLogicalCollectiveBool(B,fill,5); 29 ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr); 30 ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr); 31 ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr); 32 ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr); 33 ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr); 34 ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr); 35 ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr); 36 ierr = MatSetUp(preallocator);CHKERRQ(ierr); 37 ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr); 38 for (r = rstart; r < rend; ++r) { 39 PetscInt ncols; 40 const PetscInt *row; 41 const PetscScalar *vals; 42 43 for (i = 0; i < nm; i++) { 44 ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 45 ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 46 if (symm && symm[i]) { 47 ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 48 } 49 ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 50 } 51 } 52 ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 53 ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 54 ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr); 55 ierr = MatDestroy(&preallocator);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 59 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 60 { 61 Mat B; 62 PetscErrorCode ierr; 63 PetscInt r; 64 65 PetscFunctionBegin; 66 if (reuse != MAT_REUSE_MATRIX) { 67 PetscBool symm = PETSC_TRUE,isdense; 68 PetscInt bs; 69 70 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 71 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 72 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 73 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 74 ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr); 75 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 76 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 77 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");CHKERRQ(ierr); 78 if (!isdense) { 79 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 80 ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr); 81 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 82 } else { 83 ierr = MatSetUp(B);CHKERRQ(ierr); 84 } 85 } else { 86 B = *newmat; 87 ierr = MatZeroEntries(B);CHKERRQ(ierr); 88 } 89 90 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 91 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 92 PetscInt ncols; 93 const PetscInt *row; 94 const PetscScalar *vals; 95 96 ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 97 ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 98 #if defined(PETSC_USE_COMPLEX) 99 if (A->hermitian) { 100 PetscInt i; 101 for (i = 0; i < ncols; i++) { 102 ierr = MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES);CHKERRQ(ierr); 103 } 104 } else { 105 ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 106 } 107 #else 108 ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 109 #endif 110 ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 111 } 112 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 113 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 114 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 115 116 if (reuse == MAT_INPLACE_MATRIX) { 117 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 118 } else { 119 *newmat = B; 120 } 121 PetscFunctionReturn(0); 122 } 123 124 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 125 { 126 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 127 PetscErrorCode ierr; 128 129 PetscFunctionBegin; 130 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 131 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 132 PetscFunctionReturn(0); 133 } 134 135 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 136 { 137 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 138 PetscErrorCode ierr; 139 140 PetscFunctionBegin; 141 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 142 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 143 PetscFunctionReturn(0); 144 } 145 146 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 147 { \ 148 brow = row/bs; \ 149 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 150 rmax = aimax[brow]; nrow = ailen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto a_noinsert; \ 166 } \ 167 } \ 168 if (a->nonew == 1) goto a_noinsert; \ 169 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 170 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 171 N = nrow++ - 1; \ 172 /* shift up all the later entries in this row */ \ 173 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 174 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 175 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 176 rp[_i] = bcol; \ 177 ap[bs2*_i + bs*cidx + ridx] = value; \ 178 A->nonzerostate++;\ 179 a_noinsert:; \ 180 ailen[brow] = nrow; \ 181 } 182 183 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 184 { \ 185 brow = row/bs; \ 186 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 187 rmax = bimax[brow]; nrow = bilen[brow]; \ 188 bcol = col/bs; \ 189 ridx = row % bs; cidx = col % bs; \ 190 low = 0; high = nrow; \ 191 while (high-low > 3) { \ 192 t = (low+high)/2; \ 193 if (rp[t] > bcol) high = t; \ 194 else low = t; \ 195 } \ 196 for (_i=low; _i<high; _i++) { \ 197 if (rp[_i] > bcol) break; \ 198 if (rp[_i] == bcol) { \ 199 bap = ap + bs2*_i + bs*cidx + ridx; \ 200 if (addv == ADD_VALUES) *bap += value; \ 201 else *bap = value; \ 202 goto b_noinsert; \ 203 } \ 204 } \ 205 if (b->nonew == 1) goto b_noinsert; \ 206 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 207 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 208 N = nrow++ - 1; \ 209 /* shift up all the later entries in this row */ \ 210 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 211 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 212 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 213 rp[_i] = bcol; \ 214 ap[bs2*_i + bs*cidx + ridx] = value; \ 215 B->nonzerostate++;\ 216 b_noinsert:; \ 217 bilen[brow] = nrow; \ 218 } 219 220 /* Only add/insert a(i,j) with i<=j (blocks). 221 Any a(i,j) with i>j input by user is ingored or generates an error 222 */ 223 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 224 { 225 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 226 MatScalar value; 227 PetscBool roworiented = baij->roworiented; 228 PetscErrorCode ierr; 229 PetscInt i,j,row,col; 230 PetscInt rstart_orig=mat->rmap->rstart; 231 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 232 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 233 234 /* Some Variables required in the macro */ 235 Mat A = baij->A; 236 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 237 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 238 MatScalar *aa =a->a; 239 240 Mat B = baij->B; 241 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 242 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 243 MatScalar *ba =b->a; 244 245 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 246 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 247 MatScalar *ap,*bap; 248 249 /* for stash */ 250 PetscInt n_loc, *in_loc = NULL; 251 MatScalar *v_loc = NULL; 252 253 PetscFunctionBegin; 254 if (!baij->donotstash) { 255 if (n > baij->n_loc) { 256 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 257 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 258 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 259 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 260 261 baij->n_loc = n; 262 } 263 in_loc = baij->in_loc; 264 v_loc = baij->v_loc; 265 } 266 267 for (i=0; i<m; i++) { 268 if (im[i] < 0) continue; 269 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 270 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 271 row = im[i] - rstart_orig; /* local row index */ 272 for (j=0; j<n; j++) { 273 if (im[i]/bs > in[j]/bs) { 274 if (a->ignore_ltriangular) { 275 continue; /* ignore lower triangular blocks */ 276 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 277 } 278 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 279 col = in[j] - cstart_orig; /* local col index */ 280 brow = row/bs; bcol = col/bs; 281 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 282 if (roworiented) value = v[i*n+j]; 283 else value = v[i+j*m]; 284 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 285 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 286 } else if (in[j] < 0) continue; 287 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 288 else { /* off-diag entry (B) */ 289 if (mat->was_assembled) { 290 if (!baij->colmap) { 291 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 292 } 293 #if defined(PETSC_USE_CTABLE) 294 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 295 col = col - 1; 296 #else 297 col = baij->colmap[in[j]/bs] - 1; 298 #endif 299 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 300 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 301 col = in[j]; 302 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 303 B = baij->B; 304 b = (Mat_SeqBAIJ*)(B)->data; 305 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 306 ba = b->a; 307 } else col += in[j]%bs; 308 } else col = in[j]; 309 if (roworiented) value = v[i*n+j]; 310 else value = v[i+j*m]; 311 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 312 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 313 } 314 } 315 } else { /* off processor entry */ 316 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 317 if (!baij->donotstash) { 318 mat->assembled = PETSC_FALSE; 319 n_loc = 0; 320 for (j=0; j<n; j++) { 321 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 322 in_loc[n_loc] = in[j]; 323 if (roworiented) { 324 v_loc[n_loc] = v[i*n+j]; 325 } else { 326 v_loc[n_loc] = v[j*m+i]; 327 } 328 n_loc++; 329 } 330 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 331 } 332 } 333 } 334 PetscFunctionReturn(0); 335 } 336 337 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 338 { 339 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 340 PetscErrorCode ierr; 341 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 342 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 343 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 344 PetscBool roworiented=a->roworiented; 345 const PetscScalar *value = v; 346 MatScalar *ap,*aa = a->a,*bap; 347 348 PetscFunctionBegin; 349 if (col < row) { 350 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 351 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 352 } 353 rp = aj + ai[row]; 354 ap = aa + bs2*ai[row]; 355 rmax = imax[row]; 356 nrow = ailen[row]; 357 value = v; 358 low = 0; 359 high = nrow; 360 361 while (high-low > 7) { 362 t = (low+high)/2; 363 if (rp[t] > col) high = t; 364 else low = t; 365 } 366 for (i=low; i<high; i++) { 367 if (rp[i] > col) break; 368 if (rp[i] == col) { 369 bap = ap + bs2*i; 370 if (roworiented) { 371 if (is == ADD_VALUES) { 372 for (ii=0; ii<bs; ii++) { 373 for (jj=ii; jj<bs2; jj+=bs) { 374 bap[jj] += *value++; 375 } 376 } 377 } else { 378 for (ii=0; ii<bs; ii++) { 379 for (jj=ii; jj<bs2; jj+=bs) { 380 bap[jj] = *value++; 381 } 382 } 383 } 384 } else { 385 if (is == ADD_VALUES) { 386 for (ii=0; ii<bs; ii++) { 387 for (jj=0; jj<bs; jj++) { 388 *bap++ += *value++; 389 } 390 } 391 } else { 392 for (ii=0; ii<bs; ii++) { 393 for (jj=0; jj<bs; jj++) { 394 *bap++ = *value++; 395 } 396 } 397 } 398 } 399 goto noinsert2; 400 } 401 } 402 if (nonew == 1) goto noinsert2; 403 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 404 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 405 N = nrow++ - 1; high++; 406 /* shift up all the later entries in this row */ 407 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 408 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 409 rp[i] = col; 410 bap = ap + bs2*i; 411 if (roworiented) { 412 for (ii=0; ii<bs; ii++) { 413 for (jj=ii; jj<bs2; jj+=bs) { 414 bap[jj] = *value++; 415 } 416 } 417 } else { 418 for (ii=0; ii<bs; ii++) { 419 for (jj=0; jj<bs; jj++) { 420 *bap++ = *value++; 421 } 422 } 423 } 424 noinsert2:; 425 ailen[row] = nrow; 426 PetscFunctionReturn(0); 427 } 428 429 /* 430 This routine is exactly duplicated in mpibaij.c 431 */ 432 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 433 { 434 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 435 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 436 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 437 PetscErrorCode ierr; 438 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 439 PetscBool roworiented=a->roworiented; 440 const PetscScalar *value = v; 441 MatScalar *ap,*aa = a->a,*bap; 442 443 PetscFunctionBegin; 444 rp = aj + ai[row]; 445 ap = aa + bs2*ai[row]; 446 rmax = imax[row]; 447 nrow = ailen[row]; 448 low = 0; 449 high = nrow; 450 value = v; 451 while (high-low > 7) { 452 t = (low+high)/2; 453 if (rp[t] > col) high = t; 454 else low = t; 455 } 456 for (i=low; i<high; i++) { 457 if (rp[i] > col) break; 458 if (rp[i] == col) { 459 bap = ap + bs2*i; 460 if (roworiented) { 461 if (is == ADD_VALUES) { 462 for (ii=0; ii<bs; ii++) { 463 for (jj=ii; jj<bs2; jj+=bs) { 464 bap[jj] += *value++; 465 } 466 } 467 } else { 468 for (ii=0; ii<bs; ii++) { 469 for (jj=ii; jj<bs2; jj+=bs) { 470 bap[jj] = *value++; 471 } 472 } 473 } 474 } else { 475 if (is == ADD_VALUES) { 476 for (ii=0; ii<bs; ii++,value+=bs) { 477 for (jj=0; jj<bs; jj++) { 478 bap[jj] += value[jj]; 479 } 480 bap += bs; 481 } 482 } else { 483 for (ii=0; ii<bs; ii++,value+=bs) { 484 for (jj=0; jj<bs; jj++) { 485 bap[jj] = value[jj]; 486 } 487 bap += bs; 488 } 489 } 490 } 491 goto noinsert2; 492 } 493 } 494 if (nonew == 1) goto noinsert2; 495 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 496 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 497 N = nrow++ - 1; high++; 498 /* shift up all the later entries in this row */ 499 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 500 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 501 rp[i] = col; 502 bap = ap + bs2*i; 503 if (roworiented) { 504 for (ii=0; ii<bs; ii++) { 505 for (jj=ii; jj<bs2; jj+=bs) { 506 bap[jj] = *value++; 507 } 508 } 509 } else { 510 for (ii=0; ii<bs; ii++) { 511 for (jj=0; jj<bs; jj++) { 512 *bap++ = *value++; 513 } 514 } 515 } 516 noinsert2:; 517 ailen[row] = nrow; 518 PetscFunctionReturn(0); 519 } 520 521 /* 522 This routine could be optimized by removing the need for the block copy below and passing stride information 523 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 524 */ 525 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 526 { 527 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 528 const MatScalar *value; 529 MatScalar *barray =baij->barray; 530 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 531 PetscErrorCode ierr; 532 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 533 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 534 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 535 536 PetscFunctionBegin; 537 if (!barray) { 538 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 539 baij->barray = barray; 540 } 541 542 if (roworiented) { 543 stepval = (n-1)*bs; 544 } else { 545 stepval = (m-1)*bs; 546 } 547 for (i=0; i<m; i++) { 548 if (im[i] < 0) continue; 549 if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 550 if (im[i] >= rstart && im[i] < rend) { 551 row = im[i] - rstart; 552 for (j=0; j<n; j++) { 553 if (im[i] > in[j]) { 554 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 555 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 556 } 557 /* If NumCol = 1 then a copy is not required */ 558 if ((roworiented) && (n == 1)) { 559 barray = (MatScalar*) v + i*bs2; 560 } else if ((!roworiented) && (m == 1)) { 561 barray = (MatScalar*) v + j*bs2; 562 } else { /* Here a copy is required */ 563 if (roworiented) { 564 value = v + i*(stepval+bs)*bs + j*bs; 565 } else { 566 value = v + j*(stepval+bs)*bs + i*bs; 567 } 568 for (ii=0; ii<bs; ii++,value+=stepval) { 569 for (jj=0; jj<bs; jj++) { 570 *barray++ = *value++; 571 } 572 } 573 barray -=bs2; 574 } 575 576 if (in[j] >= cstart && in[j] < cend) { 577 col = in[j] - cstart; 578 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 579 } else if (in[j] < 0) continue; 580 else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 581 else { 582 if (mat->was_assembled) { 583 if (!baij->colmap) { 584 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 585 } 586 587 #if defined(PETSC_USE_DEBUG) 588 #if defined(PETSC_USE_CTABLE) 589 { PetscInt data; 590 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 591 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 592 } 593 #else 594 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 595 #endif 596 #endif 597 #if defined(PETSC_USE_CTABLE) 598 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 599 col = (col - 1)/bs; 600 #else 601 col = (baij->colmap[in[j]] - 1)/bs; 602 #endif 603 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 604 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 605 col = in[j]; 606 } 607 } else col = in[j]; 608 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 609 } 610 } 611 } else { 612 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 613 if (!baij->donotstash) { 614 if (roworiented) { 615 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 616 } else { 617 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 618 } 619 } 620 } 621 } 622 PetscFunctionReturn(0); 623 } 624 625 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 626 { 627 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 628 PetscErrorCode ierr; 629 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 630 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 631 632 PetscFunctionBegin; 633 for (i=0; i<m; i++) { 634 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 635 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 636 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 637 row = idxm[i] - bsrstart; 638 for (j=0; j<n; j++) { 639 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 640 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 641 if (idxn[j] >= bscstart && idxn[j] < bscend) { 642 col = idxn[j] - bscstart; 643 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 644 } else { 645 if (!baij->colmap) { 646 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 647 } 648 #if defined(PETSC_USE_CTABLE) 649 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 650 data--; 651 #else 652 data = baij->colmap[idxn[j]/bs]-1; 653 #endif 654 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 655 else { 656 col = data + idxn[j]%bs; 657 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 658 } 659 } 660 } 661 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 662 } 663 PetscFunctionReturn(0); 664 } 665 666 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 667 { 668 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 669 PetscErrorCode ierr; 670 PetscReal sum[2],*lnorm2; 671 672 PetscFunctionBegin; 673 if (baij->size == 1) { 674 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 675 } else { 676 if (type == NORM_FROBENIUS) { 677 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 678 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 679 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 680 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 681 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 682 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 683 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 684 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 685 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 686 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 687 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 688 PetscReal *rsum,*rsum2,vabs; 689 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 690 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 691 MatScalar *v; 692 693 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 694 ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr); 695 /* Amat */ 696 v = amat->a; jj = amat->j; 697 for (brow=0; brow<mbs; brow++) { 698 grow = bs*(rstart + brow); 699 nz = amat->i[brow+1] - amat->i[brow]; 700 for (bcol=0; bcol<nz; bcol++) { 701 gcol = bs*(rstart + *jj); jj++; 702 for (col=0; col<bs; col++) { 703 for (row=0; row<bs; row++) { 704 vabs = PetscAbsScalar(*v); v++; 705 rsum[gcol+col] += vabs; 706 /* non-diagonal block */ 707 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 708 } 709 } 710 } 711 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 712 } 713 /* Bmat */ 714 v = bmat->a; jj = bmat->j; 715 for (brow=0; brow<mbs; brow++) { 716 grow = bs*(rstart + brow); 717 nz = bmat->i[brow+1] - bmat->i[brow]; 718 for (bcol=0; bcol<nz; bcol++) { 719 gcol = bs*garray[*jj]; jj++; 720 for (col=0; col<bs; col++) { 721 for (row=0; row<bs; row++) { 722 vabs = PetscAbsScalar(*v); v++; 723 rsum[gcol+col] += vabs; 724 rsum[grow+row] += vabs; 725 } 726 } 727 } 728 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 729 } 730 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 731 *norm = 0.0; 732 for (col=0; col<mat->cmap->N; col++) { 733 if (rsum2[col] > *norm) *norm = rsum2[col]; 734 } 735 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 736 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 737 } 738 PetscFunctionReturn(0); 739 } 740 741 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 742 { 743 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 744 PetscErrorCode ierr; 745 PetscInt nstash,reallocs; 746 747 PetscFunctionBegin; 748 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 749 750 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 751 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 752 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 753 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 754 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 755 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 756 PetscFunctionReturn(0); 757 } 758 759 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 760 { 761 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 762 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 763 PetscErrorCode ierr; 764 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 765 PetscInt *row,*col; 766 PetscBool other_disassembled; 767 PetscMPIInt n; 768 PetscBool r1,r2,r3; 769 MatScalar *val; 770 771 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 772 PetscFunctionBegin; 773 if (!baij->donotstash && !mat->nooffprocentries) { 774 while (1) { 775 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 776 if (!flg) break; 777 778 for (i=0; i<n;) { 779 /* Now identify the consecutive vals belonging to the same row */ 780 for (j=i,rstart=row[j]; j<n; j++) { 781 if (row[j] != rstart) break; 782 } 783 if (j < n) ncols = j-i; 784 else ncols = n-i; 785 /* Now assemble all these values with a single function call */ 786 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 787 i = j; 788 } 789 } 790 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 791 /* Now process the block-stash. Since the values are stashed column-oriented, 792 set the roworiented flag to column oriented, and after MatSetValues() 793 restore the original flags */ 794 r1 = baij->roworiented; 795 r2 = a->roworiented; 796 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 797 798 baij->roworiented = PETSC_FALSE; 799 a->roworiented = PETSC_FALSE; 800 801 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 802 while (1) { 803 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 804 if (!flg) break; 805 806 for (i=0; i<n;) { 807 /* Now identify the consecutive vals belonging to the same row */ 808 for (j=i,rstart=row[j]; j<n; j++) { 809 if (row[j] != rstart) break; 810 } 811 if (j < n) ncols = j-i; 812 else ncols = n-i; 813 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 814 i = j; 815 } 816 } 817 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 818 819 baij->roworiented = r1; 820 a->roworiented = r2; 821 822 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 823 } 824 825 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 826 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 827 828 /* determine if any processor has disassembled, if so we must 829 also disassemble ourselfs, in order that we may reassemble. */ 830 /* 831 if nonzero structure of submatrix B cannot change then we know that 832 no processor disassembled thus we can skip this stuff 833 */ 834 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 835 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 836 if (mat->was_assembled && !other_disassembled) { 837 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 838 } 839 } 840 841 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 842 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 843 } 844 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 845 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 846 847 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 848 849 baij->rowvalues = 0; 850 851 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 852 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 853 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 854 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 855 } 856 PetscFunctionReturn(0); 857 } 858 859 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 860 #include <petscdraw.h> 861 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 862 { 863 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 864 PetscErrorCode ierr; 865 PetscInt bs = mat->rmap->bs; 866 PetscMPIInt rank = baij->rank; 867 PetscBool iascii,isdraw; 868 PetscViewer sviewer; 869 PetscViewerFormat format; 870 871 PetscFunctionBegin; 872 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 873 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 874 if (iascii) { 875 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 876 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 877 MatInfo info; 878 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 879 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 880 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 881 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 882 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 883 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 884 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 885 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 886 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 887 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 888 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 889 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } else if (format == PETSC_VIEWER_ASCII_INFO) { 892 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 893 PetscFunctionReturn(0); 894 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 895 PetscFunctionReturn(0); 896 } 897 } 898 899 if (isdraw) { 900 PetscDraw draw; 901 PetscBool isnull; 902 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 903 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 904 if (isnull) PetscFunctionReturn(0); 905 } 906 907 { 908 /* assemble the entire matrix onto first processor. */ 909 Mat A; 910 Mat_SeqSBAIJ *Aloc; 911 Mat_SeqBAIJ *Bloc; 912 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 913 MatScalar *a; 914 const char *matname; 915 916 /* Should this be the same type as mat? */ 917 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 918 if (!rank) { 919 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 920 } else { 921 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 922 } 923 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 924 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 925 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 926 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 927 928 /* copy over the A part */ 929 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 930 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 931 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 932 933 for (i=0; i<mbs; i++) { 934 rvals[0] = bs*(baij->rstartbs + i); 935 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 936 for (j=ai[i]; j<ai[i+1]; j++) { 937 col = (baij->cstartbs+aj[j])*bs; 938 for (k=0; k<bs; k++) { 939 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 940 col++; 941 a += bs; 942 } 943 } 944 } 945 /* copy over the B part */ 946 Bloc = (Mat_SeqBAIJ*)baij->B->data; 947 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 948 for (i=0; i<mbs; i++) { 949 950 rvals[0] = bs*(baij->rstartbs + i); 951 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 952 for (j=ai[i]; j<ai[i+1]; j++) { 953 col = baij->garray[aj[j]]*bs; 954 for (k=0; k<bs; k++) { 955 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 956 col++; 957 a += bs; 958 } 959 } 960 } 961 ierr = PetscFree(rvals);CHKERRQ(ierr); 962 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 963 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 964 /* 965 Everyone has to call to draw the matrix since the graphics waits are 966 synchronized across all processors that share the PetscDraw object 967 */ 968 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 969 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 970 if (!rank) { 971 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 972 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 973 } 974 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 975 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 976 ierr = MatDestroy(&A);CHKERRQ(ierr); 977 } 978 PetscFunctionReturn(0); 979 } 980 981 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 982 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 983 984 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 985 { 986 PetscErrorCode ierr; 987 PetscBool iascii,isdraw,issocket,isbinary; 988 989 PetscFunctionBegin; 990 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 991 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 992 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 993 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 994 if (iascii || isdraw || issocket) { 995 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 996 } else if (isbinary) { 997 ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 998 } 999 PetscFunctionReturn(0); 1000 } 1001 1002 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1003 { 1004 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1005 PetscErrorCode ierr; 1006 1007 PetscFunctionBegin; 1008 #if defined(PETSC_USE_LOG) 1009 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1010 #endif 1011 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1012 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1013 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1014 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1015 #if defined(PETSC_USE_CTABLE) 1016 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1017 #else 1018 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1019 #endif 1020 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1021 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1022 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1023 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 1024 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 1025 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 1026 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 1027 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 1028 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 1029 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1030 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1031 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1032 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 1033 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 1034 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1035 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1036 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1037 #endif 1038 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 1039 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1040 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1041 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1042 1043 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1044 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1045 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1046 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1047 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1048 #if defined(PETSC_HAVE_ELEMENTAL) 1049 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 1050 #endif 1051 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr); 1052 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr); 1053 PetscFunctionReturn(0); 1054 } 1055 1056 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1057 { 1058 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1059 PetscErrorCode ierr; 1060 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1061 PetscScalar *from; 1062 const PetscScalar *x; 1063 1064 PetscFunctionBegin; 1065 /* diagonal part */ 1066 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1067 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1068 1069 /* subdiagonal part */ 1070 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1071 1072 /* copy x into the vec slvec0 */ 1073 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1074 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1075 1076 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1077 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1078 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1079 1080 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1081 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1082 /* supperdiagonal part */ 1083 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1088 { 1089 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1090 PetscErrorCode ierr; 1091 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1092 PetscScalar *from; 1093 const PetscScalar *x; 1094 1095 PetscFunctionBegin; 1096 /* diagonal part */ 1097 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1098 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1099 1100 /* subdiagonal part */ 1101 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1102 1103 /* copy x into the vec slvec0 */ 1104 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1105 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1106 1107 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1108 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1109 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1110 1111 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1113 /* supperdiagonal part */ 1114 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1119 { 1120 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1121 PetscErrorCode ierr; 1122 PetscInt nt; 1123 1124 PetscFunctionBegin; 1125 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1126 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1127 1128 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1129 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1130 1131 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1132 /* do diagonal part */ 1133 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1134 /* do supperdiagonal part */ 1135 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1136 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1137 /* do subdiagonal part */ 1138 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1139 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz) 1145 { 1146 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1147 PetscErrorCode ierr; 1148 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1149 PetscScalar *from,zero=0.0; 1150 const PetscScalar *x; 1151 1152 PetscFunctionBegin; 1153 /* diagonal part */ 1154 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1155 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1156 1157 /* subdiagonal part */ 1158 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1159 1160 /* copy x into the vec slvec0 */ 1161 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1162 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1163 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1164 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1165 1166 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1167 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1168 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1169 1170 /* supperdiagonal part */ 1171 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1176 { 1177 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1178 PetscErrorCode ierr; 1179 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1180 PetscScalar *from,zero=0.0; 1181 const PetscScalar *x; 1182 1183 PetscFunctionBegin; 1184 /* diagonal part */ 1185 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1186 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1187 1188 /* subdiagonal part */ 1189 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1190 1191 /* copy x into the vec slvec0 */ 1192 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1193 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1194 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1195 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1196 1197 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1198 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1199 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1200 1201 /* supperdiagonal part */ 1202 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1203 PetscFunctionReturn(0); 1204 } 1205 1206 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1207 { 1208 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1209 PetscErrorCode ierr; 1210 1211 PetscFunctionBegin; 1212 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1213 /* do diagonal part */ 1214 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1215 /* do supperdiagonal part */ 1216 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1217 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1218 1219 /* do subdiagonal part */ 1220 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1221 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1222 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1223 PetscFunctionReturn(0); 1224 } 1225 1226 /* 1227 This only works correctly for square matrices where the subblock A->A is the 1228 diagonal block 1229 */ 1230 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1231 { 1232 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1233 PetscErrorCode ierr; 1234 1235 PetscFunctionBegin; 1236 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1237 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1238 PetscFunctionReturn(0); 1239 } 1240 1241 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1242 { 1243 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1244 PetscErrorCode ierr; 1245 1246 PetscFunctionBegin; 1247 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1248 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1253 { 1254 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1255 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1256 PetscErrorCode ierr; 1257 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1258 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1259 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1260 1261 PetscFunctionBegin; 1262 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1263 mat->getrowactive = PETSC_TRUE; 1264 1265 if (!mat->rowvalues && (idx || v)) { 1266 /* 1267 allocate enough space to hold information from the longest row. 1268 */ 1269 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1270 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1271 PetscInt max = 1,mbs = mat->mbs,tmp; 1272 for (i=0; i<mbs; i++) { 1273 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1274 if (max < tmp) max = tmp; 1275 } 1276 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1277 } 1278 1279 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1280 lrow = row - brstart; /* local row index */ 1281 1282 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1283 if (!v) {pvA = 0; pvB = 0;} 1284 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1285 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1286 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1287 nztot = nzA + nzB; 1288 1289 cmap = mat->garray; 1290 if (v || idx) { 1291 if (nztot) { 1292 /* Sort by increasing column numbers, assuming A and B already sorted */ 1293 PetscInt imark = -1; 1294 if (v) { 1295 *v = v_p = mat->rowvalues; 1296 for (i=0; i<nzB; i++) { 1297 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1298 else break; 1299 } 1300 imark = i; 1301 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1302 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1303 } 1304 if (idx) { 1305 *idx = idx_p = mat->rowindices; 1306 if (imark > -1) { 1307 for (i=0; i<imark; i++) { 1308 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1309 } 1310 } else { 1311 for (i=0; i<nzB; i++) { 1312 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1313 else break; 1314 } 1315 imark = i; 1316 } 1317 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1318 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1319 } 1320 } else { 1321 if (idx) *idx = 0; 1322 if (v) *v = 0; 1323 } 1324 } 1325 *nz = nztot; 1326 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1327 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1332 { 1333 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1334 1335 PetscFunctionBegin; 1336 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1337 baij->getrowactive = PETSC_FALSE; 1338 PetscFunctionReturn(0); 1339 } 1340 1341 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1342 { 1343 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1344 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1345 1346 PetscFunctionBegin; 1347 aA->getrow_utriangular = PETSC_TRUE; 1348 PetscFunctionReturn(0); 1349 } 1350 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1351 { 1352 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1353 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1354 1355 PetscFunctionBegin; 1356 aA->getrow_utriangular = PETSC_FALSE; 1357 PetscFunctionReturn(0); 1358 } 1359 1360 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1361 { 1362 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1363 PetscErrorCode ierr; 1364 1365 PetscFunctionBegin; 1366 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1367 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1368 PetscFunctionReturn(0); 1369 } 1370 1371 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1372 { 1373 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1374 PetscErrorCode ierr; 1375 1376 PetscFunctionBegin; 1377 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1378 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1379 PetscFunctionReturn(0); 1380 } 1381 1382 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1383 Input: isrow - distributed(parallel), 1384 iscol_local - locally owned (seq) 1385 */ 1386 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1387 { 1388 PetscErrorCode ierr; 1389 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1390 const PetscInt *ptr1,*ptr2; 1391 1392 PetscFunctionBegin; 1393 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1394 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1395 if (sz1 > sz2) { 1396 *flg = PETSC_FALSE; 1397 PetscFunctionReturn(0); 1398 } 1399 1400 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1401 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1402 1403 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1404 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1405 ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr); 1406 ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr); 1407 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1408 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1409 1410 nmatch=0; 1411 k = 0; 1412 for (i=0; i<sz1; i++){ 1413 for (j=k; j<sz2; j++){ 1414 if (a1[i] == a2[j]) { 1415 k = j; nmatch++; 1416 break; 1417 } 1418 } 1419 } 1420 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1421 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1422 ierr = PetscFree(a1);CHKERRQ(ierr); 1423 ierr = PetscFree(a2);CHKERRQ(ierr); 1424 if (nmatch < sz1) { 1425 *flg = PETSC_FALSE; 1426 } else { 1427 *flg = PETSC_TRUE; 1428 } 1429 PetscFunctionReturn(0); 1430 } 1431 1432 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1433 { 1434 PetscErrorCode ierr; 1435 IS iscol_local; 1436 PetscInt csize; 1437 PetscBool isequal; 1438 1439 PetscFunctionBegin; 1440 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1441 if (call == MAT_REUSE_MATRIX) { 1442 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1443 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1444 } else { 1445 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1446 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1447 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1448 } 1449 1450 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1451 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1452 if (call == MAT_INITIAL_MATRIX) { 1453 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1454 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1460 { 1461 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1462 PetscErrorCode ierr; 1463 1464 PetscFunctionBegin; 1465 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1466 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1467 PetscFunctionReturn(0); 1468 } 1469 1470 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1471 { 1472 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1473 Mat A = a->A,B = a->B; 1474 PetscErrorCode ierr; 1475 PetscLogDouble isend[5],irecv[5]; 1476 1477 PetscFunctionBegin; 1478 info->block_size = (PetscReal)matin->rmap->bs; 1479 1480 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1481 1482 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1483 isend[3] = info->memory; isend[4] = info->mallocs; 1484 1485 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1486 1487 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1488 isend[3] += info->memory; isend[4] += info->mallocs; 1489 if (flag == MAT_LOCAL) { 1490 info->nz_used = isend[0]; 1491 info->nz_allocated = isend[1]; 1492 info->nz_unneeded = isend[2]; 1493 info->memory = isend[3]; 1494 info->mallocs = isend[4]; 1495 } else if (flag == MAT_GLOBAL_MAX) { 1496 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1497 1498 info->nz_used = irecv[0]; 1499 info->nz_allocated = irecv[1]; 1500 info->nz_unneeded = irecv[2]; 1501 info->memory = irecv[3]; 1502 info->mallocs = irecv[4]; 1503 } else if (flag == MAT_GLOBAL_SUM) { 1504 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1505 1506 info->nz_used = irecv[0]; 1507 info->nz_allocated = irecv[1]; 1508 info->nz_unneeded = irecv[2]; 1509 info->memory = irecv[3]; 1510 info->mallocs = irecv[4]; 1511 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1512 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1513 info->fill_ratio_needed = 0; 1514 info->factor_mallocs = 0; 1515 PetscFunctionReturn(0); 1516 } 1517 1518 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1519 { 1520 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1521 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 switch (op) { 1526 case MAT_NEW_NONZERO_LOCATIONS: 1527 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1528 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1529 case MAT_KEEP_NONZERO_PATTERN: 1530 case MAT_SUBMAT_SINGLEIS: 1531 case MAT_NEW_NONZERO_LOCATION_ERR: 1532 MatCheckPreallocated(A,1); 1533 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1534 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1535 break; 1536 case MAT_ROW_ORIENTED: 1537 MatCheckPreallocated(A,1); 1538 a->roworiented = flg; 1539 1540 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1541 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1542 break; 1543 case MAT_NEW_DIAGONALS: 1544 case MAT_SORTED_FULL: 1545 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1546 break; 1547 case MAT_IGNORE_OFF_PROC_ENTRIES: 1548 a->donotstash = flg; 1549 break; 1550 case MAT_USE_HASH_TABLE: 1551 a->ht_flag = flg; 1552 break; 1553 case MAT_HERMITIAN: 1554 MatCheckPreallocated(A,1); 1555 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1556 #if defined(PETSC_USE_COMPLEX) 1557 if (flg) { /* need different mat-vec ops */ 1558 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1559 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1560 A->ops->multtranspose = NULL; 1561 A->ops->multtransposeadd = NULL; 1562 A->symmetric = PETSC_FALSE; 1563 } 1564 #endif 1565 break; 1566 case MAT_SPD: 1567 case MAT_SYMMETRIC: 1568 MatCheckPreallocated(A,1); 1569 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1570 #if defined(PETSC_USE_COMPLEX) 1571 if (flg) { /* restore to use default mat-vec ops */ 1572 A->ops->mult = MatMult_MPISBAIJ; 1573 A->ops->multadd = MatMultAdd_MPISBAIJ; 1574 A->ops->multtranspose = MatMult_MPISBAIJ; 1575 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1576 } 1577 #endif 1578 break; 1579 case MAT_STRUCTURALLY_SYMMETRIC: 1580 MatCheckPreallocated(A,1); 1581 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1582 break; 1583 case MAT_SYMMETRY_ETERNAL: 1584 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1585 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1586 break; 1587 case MAT_IGNORE_LOWER_TRIANGULAR: 1588 aA->ignore_ltriangular = flg; 1589 break; 1590 case MAT_ERROR_LOWER_TRIANGULAR: 1591 aA->ignore_ltriangular = flg; 1592 break; 1593 case MAT_GETROW_UPPERTRIANGULAR: 1594 aA->getrow_utriangular = flg; 1595 break; 1596 default: 1597 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1603 { 1604 PetscErrorCode ierr; 1605 1606 PetscFunctionBegin; 1607 if (reuse == MAT_INITIAL_MATRIX) { 1608 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1609 } else if (reuse == MAT_REUSE_MATRIX) { 1610 ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1611 } 1612 PetscFunctionReturn(0); 1613 } 1614 1615 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1616 { 1617 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1618 Mat a = baij->A, b=baij->B; 1619 PetscErrorCode ierr; 1620 PetscInt nv,m,n; 1621 PetscBool flg; 1622 1623 PetscFunctionBegin; 1624 if (ll != rr) { 1625 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1626 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1627 } 1628 if (!ll) PetscFunctionReturn(0); 1629 1630 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1631 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1632 1633 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1634 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1635 1636 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1637 1638 /* left diagonalscale the off-diagonal part */ 1639 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1640 1641 /* scale the diagonal part */ 1642 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1643 1644 /* right diagonalscale the off-diagonal part */ 1645 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1647 PetscFunctionReturn(0); 1648 } 1649 1650 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1651 { 1652 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1653 PetscErrorCode ierr; 1654 1655 PetscFunctionBegin; 1656 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1657 PetscFunctionReturn(0); 1658 } 1659 1660 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1661 1662 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1663 { 1664 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1665 Mat a,b,c,d; 1666 PetscBool flg; 1667 PetscErrorCode ierr; 1668 1669 PetscFunctionBegin; 1670 a = matA->A; b = matA->B; 1671 c = matB->A; d = matB->B; 1672 1673 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1674 if (flg) { 1675 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1676 } 1677 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1678 PetscFunctionReturn(0); 1679 } 1680 1681 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1682 { 1683 PetscErrorCode ierr; 1684 PetscBool isbaij; 1685 1686 PetscFunctionBegin; 1687 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr); 1688 if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name); 1689 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1690 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1691 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1692 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1693 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1694 } else { 1695 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1696 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1697 1698 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1699 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1700 } 1701 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1702 PetscFunctionReturn(0); 1703 } 1704 1705 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1706 { 1707 PetscErrorCode ierr; 1708 1709 PetscFunctionBegin; 1710 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1711 PetscFunctionReturn(0); 1712 } 1713 1714 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1715 { 1716 PetscErrorCode ierr; 1717 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1718 PetscBLASInt bnz,one=1; 1719 Mat_SeqSBAIJ *xa,*ya; 1720 Mat_SeqBAIJ *xb,*yb; 1721 1722 PetscFunctionBegin; 1723 if (str == SAME_NONZERO_PATTERN) { 1724 PetscScalar alpha = a; 1725 xa = (Mat_SeqSBAIJ*)xx->A->data; 1726 ya = (Mat_SeqSBAIJ*)yy->A->data; 1727 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1728 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1729 xb = (Mat_SeqBAIJ*)xx->B->data; 1730 yb = (Mat_SeqBAIJ*)yy->B->data; 1731 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1732 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1733 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1734 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1735 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1736 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1737 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1738 } else { 1739 Mat B; 1740 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1741 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1742 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1743 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1744 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1745 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1746 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1747 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1748 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1749 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1750 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1751 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1752 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1753 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1754 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1755 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1756 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1757 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1758 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1759 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1760 } 1761 PetscFunctionReturn(0); 1762 } 1763 1764 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1765 { 1766 PetscErrorCode ierr; 1767 PetscInt i; 1768 PetscBool flg; 1769 1770 PetscFunctionBegin; 1771 ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1772 for (i=0; i<n; i++) { 1773 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1774 if (!flg) { 1775 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1776 } 1777 } 1778 PetscFunctionReturn(0); 1779 } 1780 1781 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1782 { 1783 PetscErrorCode ierr; 1784 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1785 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1786 1787 PetscFunctionBegin; 1788 if (!Y->preallocated) { 1789 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1790 } else if (!aij->nz) { 1791 PetscInt nonew = aij->nonew; 1792 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1793 aij->nonew = nonew; 1794 } 1795 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1796 PetscFunctionReturn(0); 1797 } 1798 1799 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1800 { 1801 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1802 PetscErrorCode ierr; 1803 1804 PetscFunctionBegin; 1805 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1806 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1807 if (d) { 1808 PetscInt rstart; 1809 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1810 *d += rstart/A->rmap->bs; 1811 1812 } 1813 PetscFunctionReturn(0); 1814 } 1815 1816 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1817 { 1818 PetscFunctionBegin; 1819 *a = ((Mat_MPISBAIJ*)A->data)->A; 1820 PetscFunctionReturn(0); 1821 } 1822 1823 /* -------------------------------------------------------------------*/ 1824 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1825 MatGetRow_MPISBAIJ, 1826 MatRestoreRow_MPISBAIJ, 1827 MatMult_MPISBAIJ, 1828 /* 4*/ MatMultAdd_MPISBAIJ, 1829 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1830 MatMultAdd_MPISBAIJ, 1831 0, 1832 0, 1833 0, 1834 /* 10*/ 0, 1835 0, 1836 0, 1837 MatSOR_MPISBAIJ, 1838 MatTranspose_MPISBAIJ, 1839 /* 15*/ MatGetInfo_MPISBAIJ, 1840 MatEqual_MPISBAIJ, 1841 MatGetDiagonal_MPISBAIJ, 1842 MatDiagonalScale_MPISBAIJ, 1843 MatNorm_MPISBAIJ, 1844 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1845 MatAssemblyEnd_MPISBAIJ, 1846 MatSetOption_MPISBAIJ, 1847 MatZeroEntries_MPISBAIJ, 1848 /* 24*/ 0, 1849 0, 1850 0, 1851 0, 1852 0, 1853 /* 29*/ MatSetUp_MPISBAIJ, 1854 0, 1855 0, 1856 MatGetDiagonalBlock_MPISBAIJ, 1857 0, 1858 /* 34*/ MatDuplicate_MPISBAIJ, 1859 0, 1860 0, 1861 0, 1862 0, 1863 /* 39*/ MatAXPY_MPISBAIJ, 1864 MatCreateSubMatrices_MPISBAIJ, 1865 MatIncreaseOverlap_MPISBAIJ, 1866 MatGetValues_MPISBAIJ, 1867 MatCopy_MPISBAIJ, 1868 /* 44*/ 0, 1869 MatScale_MPISBAIJ, 1870 MatShift_MPISBAIJ, 1871 0, 1872 0, 1873 /* 49*/ 0, 1874 0, 1875 0, 1876 0, 1877 0, 1878 /* 54*/ 0, 1879 0, 1880 MatSetUnfactored_MPISBAIJ, 1881 0, 1882 MatSetValuesBlocked_MPISBAIJ, 1883 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1884 0, 1885 0, 1886 0, 1887 0, 1888 /* 64*/ 0, 1889 0, 1890 0, 1891 0, 1892 0, 1893 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1894 0, 1895 MatConvert_MPISBAIJ_Basic, 1896 0, 1897 0, 1898 /* 74*/ 0, 1899 0, 1900 0, 1901 0, 1902 0, 1903 /* 79*/ 0, 1904 0, 1905 0, 1906 0, 1907 MatLoad_MPISBAIJ, 1908 /* 84*/ 0, 1909 0, 1910 0, 1911 0, 1912 0, 1913 /* 89*/ 0, 1914 0, 1915 0, 1916 0, 1917 0, 1918 /* 94*/ 0, 1919 0, 1920 0, 1921 0, 1922 0, 1923 /* 99*/ 0, 1924 0, 1925 0, 1926 0, 1927 0, 1928 /*104*/ 0, 1929 MatRealPart_MPISBAIJ, 1930 MatImaginaryPart_MPISBAIJ, 1931 MatGetRowUpperTriangular_MPISBAIJ, 1932 MatRestoreRowUpperTriangular_MPISBAIJ, 1933 /*109*/ 0, 1934 0, 1935 0, 1936 0, 1937 MatMissingDiagonal_MPISBAIJ, 1938 /*114*/ 0, 1939 0, 1940 0, 1941 0, 1942 0, 1943 /*119*/ 0, 1944 0, 1945 0, 1946 0, 1947 0, 1948 /*124*/ 0, 1949 0, 1950 0, 1951 0, 1952 0, 1953 /*129*/ 0, 1954 0, 1955 0, 1956 0, 1957 0, 1958 /*134*/ 0, 1959 0, 1960 0, 1961 0, 1962 0, 1963 /*139*/ MatSetBlockSizes_Default, 1964 0, 1965 0, 1966 0, 1967 0, 1968 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1969 }; 1970 1971 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1972 { 1973 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1974 PetscErrorCode ierr; 1975 PetscInt i,mbs,Mbs; 1976 PetscMPIInt size; 1977 1978 PetscFunctionBegin; 1979 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1980 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1981 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1982 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1983 if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N); 1984 if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n); 1985 1986 mbs = B->rmap->n/bs; 1987 Mbs = B->rmap->N/bs; 1988 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1989 1990 B->rmap->bs = bs; 1991 b->bs2 = bs*bs; 1992 b->mbs = mbs; 1993 b->Mbs = Mbs; 1994 b->nbs = B->cmap->n/bs; 1995 b->Nbs = B->cmap->N/bs; 1996 1997 for (i=0; i<=b->size; i++) { 1998 b->rangebs[i] = B->rmap->range[i]/bs; 1999 } 2000 b->rstartbs = B->rmap->rstart/bs; 2001 b->rendbs = B->rmap->rend/bs; 2002 2003 b->cstartbs = B->cmap->rstart/bs; 2004 b->cendbs = B->cmap->rend/bs; 2005 2006 #if defined(PETSC_USE_CTABLE) 2007 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2008 #else 2009 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2010 #endif 2011 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2012 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2013 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2014 ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr); 2015 ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr); 2016 ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr); 2017 ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr); 2018 ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr); 2019 ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr); 2020 2021 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2022 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2023 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2024 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2025 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2026 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2027 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2028 2029 if (!B->preallocated) { 2030 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2031 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2032 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 2033 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2034 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2035 } 2036 2037 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2038 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2039 2040 B->preallocated = PETSC_TRUE; 2041 B->was_assembled = PETSC_FALSE; 2042 B->assembled = PETSC_FALSE; 2043 PetscFunctionReturn(0); 2044 } 2045 2046 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2047 { 2048 PetscInt m,rstart,cend; 2049 PetscInt i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0; 2050 const PetscInt *JJ =0; 2051 PetscScalar *values=0; 2052 PetscBool roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented; 2053 PetscErrorCode ierr; 2054 PetscBool nooffprocentries; 2055 2056 PetscFunctionBegin; 2057 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2058 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2059 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2060 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2061 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2062 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2063 m = B->rmap->n/bs; 2064 rstart = B->rmap->rstart/bs; 2065 cend = B->cmap->rend/bs; 2066 2067 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2068 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2069 for (i=0; i<m; i++) { 2070 nz = ii[i+1] - ii[i]; 2071 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2072 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2073 JJ = jj + ii[i]; 2074 bd = 0; 2075 for (j=0; j<nz; j++) { 2076 if (*JJ >= i + rstart) break; 2077 JJ++; 2078 bd++; 2079 } 2080 d = 0; 2081 for (; j<nz; j++) { 2082 if (*JJ++ >= cend) break; 2083 d++; 2084 } 2085 d_nnz[i] = d; 2086 o_nnz[i] = nz - d - bd; 2087 nz = nz - bd; 2088 nz_max = PetscMax(nz_max,nz); 2089 } 2090 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2091 ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2092 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2093 2094 values = (PetscScalar*)V; 2095 if (!values) { 2096 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2097 } 2098 for (i=0; i<m; i++) { 2099 PetscInt row = i + rstart; 2100 PetscInt ncols = ii[i+1] - ii[i]; 2101 const PetscInt *icols = jj + ii[i]; 2102 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2103 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2104 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2105 } else { /* block ordering does not match so we can only insert one block at a time. */ 2106 PetscInt j; 2107 for (j=0; j<ncols; j++) { 2108 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2109 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2110 } 2111 } 2112 } 2113 2114 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2115 nooffprocentries = B->nooffprocentries; 2116 B->nooffprocentries = PETSC_TRUE; 2117 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2118 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2119 B->nooffprocentries = nooffprocentries; 2120 2121 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2122 PetscFunctionReturn(0); 2123 } 2124 2125 /*MC 2126 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2127 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2128 the matrix is stored. 2129 2130 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2131 can call MatSetOption(Mat, MAT_HERMITIAN); 2132 2133 Options Database Keys: 2134 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2135 2136 Notes: 2137 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2138 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2139 2140 Level: beginner 2141 2142 .seealso: MatCreateBAIJ(), MATSEQSBAIJ, MatType 2143 M*/ 2144 2145 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2146 { 2147 Mat_MPISBAIJ *b; 2148 PetscErrorCode ierr; 2149 PetscBool flg = PETSC_FALSE; 2150 2151 PetscFunctionBegin; 2152 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2153 B->data = (void*)b; 2154 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2155 2156 B->ops->destroy = MatDestroy_MPISBAIJ; 2157 B->ops->view = MatView_MPISBAIJ; 2158 B->assembled = PETSC_FALSE; 2159 B->insertmode = NOT_SET_VALUES; 2160 2161 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2162 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2163 2164 /* build local table of row and column ownerships */ 2165 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2166 2167 /* build cache for off array entries formed */ 2168 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2169 2170 b->donotstash = PETSC_FALSE; 2171 b->colmap = NULL; 2172 b->garray = NULL; 2173 b->roworiented = PETSC_TRUE; 2174 2175 /* stuff used in block assembly */ 2176 b->barray = 0; 2177 2178 /* stuff used for matrix vector multiply */ 2179 b->lvec = 0; 2180 b->Mvctx = 0; 2181 b->slvec0 = 0; 2182 b->slvec0b = 0; 2183 b->slvec1 = 0; 2184 b->slvec1a = 0; 2185 b->slvec1b = 0; 2186 b->sMvctx = 0; 2187 2188 /* stuff for MatGetRow() */ 2189 b->rowindices = 0; 2190 b->rowvalues = 0; 2191 b->getrowactive = PETSC_FALSE; 2192 2193 /* hash table stuff */ 2194 b->ht = 0; 2195 b->hd = 0; 2196 b->ht_size = 0; 2197 b->ht_flag = PETSC_FALSE; 2198 b->ht_fact = 0; 2199 b->ht_total_ct = 0; 2200 b->ht_insert_ct = 0; 2201 2202 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2203 b->ijonly = PETSC_FALSE; 2204 2205 b->in_loc = 0; 2206 b->v_loc = 0; 2207 b->n_loc = 0; 2208 2209 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2210 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2211 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2212 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2213 #if defined(PETSC_HAVE_ELEMENTAL) 2214 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2215 #endif 2216 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr); 2217 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr); 2218 2219 B->symmetric = PETSC_TRUE; 2220 B->structurally_symmetric = PETSC_TRUE; 2221 B->symmetric_set = PETSC_TRUE; 2222 B->structurally_symmetric_set = PETSC_TRUE; 2223 B->symmetric_eternal = PETSC_TRUE; 2224 #if defined(PETSC_USE_COMPLEX) 2225 B->hermitian = PETSC_FALSE; 2226 B->hermitian_set = PETSC_FALSE; 2227 #else 2228 B->hermitian = PETSC_TRUE; 2229 B->hermitian_set = PETSC_TRUE; 2230 #endif 2231 2232 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2233 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2234 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2235 if (flg) { 2236 PetscReal fact = 1.39; 2237 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2238 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2239 if (fact <= 1.0) fact = 1.39; 2240 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2241 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2242 } 2243 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2244 PetscFunctionReturn(0); 2245 } 2246 2247 /*MC 2248 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2249 2250 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2251 and MATMPISBAIJ otherwise. 2252 2253 Options Database Keys: 2254 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2255 2256 Level: beginner 2257 2258 .seealso: MatCreateMPISBAIJ, MATSEQSBAIJ, MATMPISBAIJ 2259 M*/ 2260 2261 /*@C 2262 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2263 the user should preallocate the matrix storage by setting the parameters 2264 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2265 performance can be increased by more than a factor of 50. 2266 2267 Collective on Mat 2268 2269 Input Parameters: 2270 + B - the matrix 2271 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2272 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2273 . d_nz - number of block nonzeros per block row in diagonal portion of local 2274 submatrix (same for all local rows) 2275 . d_nnz - array containing the number of block nonzeros in the various block rows 2276 in the upper triangular and diagonal part of the in diagonal portion of the local 2277 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2278 for the diagonal entry and set a value even if it is zero. 2279 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2280 submatrix (same for all local rows). 2281 - o_nnz - array containing the number of nonzeros in the various block rows of the 2282 off-diagonal portion of the local submatrix that is right of the diagonal 2283 (possibly different for each block row) or NULL. 2284 2285 2286 Options Database Keys: 2287 + -mat_no_unroll - uses code that does not unroll the loops in the 2288 block calculations (much slower) 2289 - -mat_block_size - size of the blocks to use 2290 2291 Notes: 2292 2293 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2294 than it must be used on all processors that share the object for that argument. 2295 2296 If the *_nnz parameter is given then the *_nz parameter is ignored 2297 2298 Storage Information: 2299 For a square global matrix we define each processor's diagonal portion 2300 to be its local rows and the corresponding columns (a square submatrix); 2301 each processor's off-diagonal portion encompasses the remainder of the 2302 local matrix (a rectangular submatrix). 2303 2304 The user can specify preallocated storage for the diagonal part of 2305 the local submatrix with either d_nz or d_nnz (not both). Set 2306 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2307 memory allocation. Likewise, specify preallocated storage for the 2308 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2309 2310 You can call MatGetInfo() to get information on how effective the preallocation was; 2311 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2312 You can also run with the option -info and look for messages with the string 2313 malloc in them to see if additional memory allocation was needed. 2314 2315 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2316 the figure below we depict these three local rows and all columns (0-11). 2317 2318 .vb 2319 0 1 2 3 4 5 6 7 8 9 10 11 2320 -------------------------- 2321 row 3 |. . . d d d o o o o o o 2322 row 4 |. . . d d d o o o o o o 2323 row 5 |. . . d d d o o o o o o 2324 -------------------------- 2325 .ve 2326 2327 Thus, any entries in the d locations are stored in the d (diagonal) 2328 submatrix, and any entries in the o locations are stored in the 2329 o (off-diagonal) submatrix. Note that the d matrix is stored in 2330 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2331 2332 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2333 plus the diagonal part of the d matrix, 2334 and o_nz should indicate the number of block nonzeros per row in the o matrix 2335 2336 In general, for PDE problems in which most nonzeros are near the diagonal, 2337 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2338 or you will get TERRIBLE performance; see the users' manual chapter on 2339 matrices. 2340 2341 Level: intermediate 2342 2343 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2344 @*/ 2345 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2346 { 2347 PetscErrorCode ierr; 2348 2349 PetscFunctionBegin; 2350 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2351 PetscValidType(B,1); 2352 PetscValidLogicalCollectiveInt(B,bs,2); 2353 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2354 PetscFunctionReturn(0); 2355 } 2356 2357 /*@C 2358 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2359 (block compressed row). For good matrix assembly performance 2360 the user should preallocate the matrix storage by setting the parameters 2361 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2362 performance can be increased by more than a factor of 50. 2363 2364 Collective 2365 2366 Input Parameters: 2367 + comm - MPI communicator 2368 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2369 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2370 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2371 This value should be the same as the local size used in creating the 2372 y vector for the matrix-vector product y = Ax. 2373 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2374 This value should be the same as the local size used in creating the 2375 x vector for the matrix-vector product y = Ax. 2376 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2377 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2378 . d_nz - number of block nonzeros per block row in diagonal portion of local 2379 submatrix (same for all local rows) 2380 . d_nnz - array containing the number of block nonzeros in the various block rows 2381 in the upper triangular portion of the in diagonal portion of the local 2382 (possibly different for each block block row) or NULL. 2383 If you plan to factor the matrix you must leave room for the diagonal entry and 2384 set its value even if it is zero. 2385 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2386 submatrix (same for all local rows). 2387 - o_nnz - array containing the number of nonzeros in the various block rows of the 2388 off-diagonal portion of the local submatrix (possibly different for 2389 each block row) or NULL. 2390 2391 Output Parameter: 2392 . A - the matrix 2393 2394 Options Database Keys: 2395 + -mat_no_unroll - uses code that does not unroll the loops in the 2396 block calculations (much slower) 2397 . -mat_block_size - size of the blocks to use 2398 - -mat_mpi - use the parallel matrix data structures even on one processor 2399 (defaults to using SeqBAIJ format on one processor) 2400 2401 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2402 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2403 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2404 2405 Notes: 2406 The number of rows and columns must be divisible by blocksize. 2407 This matrix type does not support complex Hermitian operation. 2408 2409 The user MUST specify either the local or global matrix dimensions 2410 (possibly both). 2411 2412 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2413 than it must be used on all processors that share the object for that argument. 2414 2415 If the *_nnz parameter is given then the *_nz parameter is ignored 2416 2417 Storage Information: 2418 For a square global matrix we define each processor's diagonal portion 2419 to be its local rows and the corresponding columns (a square submatrix); 2420 each processor's off-diagonal portion encompasses the remainder of the 2421 local matrix (a rectangular submatrix). 2422 2423 The user can specify preallocated storage for the diagonal part of 2424 the local submatrix with either d_nz or d_nnz (not both). Set 2425 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2426 memory allocation. Likewise, specify preallocated storage for the 2427 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2428 2429 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2430 the figure below we depict these three local rows and all columns (0-11). 2431 2432 .vb 2433 0 1 2 3 4 5 6 7 8 9 10 11 2434 -------------------------- 2435 row 3 |. . . d d d o o o o o o 2436 row 4 |. . . d d d o o o o o o 2437 row 5 |. . . d d d o o o o o o 2438 -------------------------- 2439 .ve 2440 2441 Thus, any entries in the d locations are stored in the d (diagonal) 2442 submatrix, and any entries in the o locations are stored in the 2443 o (off-diagonal) submatrix. Note that the d matrix is stored in 2444 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2445 2446 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2447 plus the diagonal part of the d matrix, 2448 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2449 In general, for PDE problems in which most nonzeros are near the diagonal, 2450 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2451 or you will get TERRIBLE performance; see the users' manual chapter on 2452 matrices. 2453 2454 Level: intermediate 2455 2456 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2457 @*/ 2458 2459 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2460 { 2461 PetscErrorCode ierr; 2462 PetscMPIInt size; 2463 2464 PetscFunctionBegin; 2465 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2466 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2467 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2468 if (size > 1) { 2469 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2470 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2471 } else { 2472 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2473 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2474 } 2475 PetscFunctionReturn(0); 2476 } 2477 2478 2479 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2480 { 2481 Mat mat; 2482 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2483 PetscErrorCode ierr; 2484 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2485 PetscScalar *array; 2486 2487 PetscFunctionBegin; 2488 *newmat = 0; 2489 2490 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2491 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2492 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2493 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2494 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2495 2496 mat->factortype = matin->factortype; 2497 mat->preallocated = PETSC_TRUE; 2498 mat->assembled = PETSC_TRUE; 2499 mat->insertmode = NOT_SET_VALUES; 2500 2501 a = (Mat_MPISBAIJ*)mat->data; 2502 a->bs2 = oldmat->bs2; 2503 a->mbs = oldmat->mbs; 2504 a->nbs = oldmat->nbs; 2505 a->Mbs = oldmat->Mbs; 2506 a->Nbs = oldmat->Nbs; 2507 2508 a->size = oldmat->size; 2509 a->rank = oldmat->rank; 2510 a->donotstash = oldmat->donotstash; 2511 a->roworiented = oldmat->roworiented; 2512 a->rowindices = 0; 2513 a->rowvalues = 0; 2514 a->getrowactive = PETSC_FALSE; 2515 a->barray = 0; 2516 a->rstartbs = oldmat->rstartbs; 2517 a->rendbs = oldmat->rendbs; 2518 a->cstartbs = oldmat->cstartbs; 2519 a->cendbs = oldmat->cendbs; 2520 2521 /* hash table stuff */ 2522 a->ht = 0; 2523 a->hd = 0; 2524 a->ht_size = 0; 2525 a->ht_flag = oldmat->ht_flag; 2526 a->ht_fact = oldmat->ht_fact; 2527 a->ht_total_ct = 0; 2528 a->ht_insert_ct = 0; 2529 2530 ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr); 2531 if (oldmat->colmap) { 2532 #if defined(PETSC_USE_CTABLE) 2533 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2534 #else 2535 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2536 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2537 ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr); 2538 #endif 2539 } else a->colmap = 0; 2540 2541 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2542 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2543 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2544 ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); 2545 } else a->garray = 0; 2546 2547 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2548 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2549 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2550 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2551 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2552 2553 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2554 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2555 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2556 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2557 2558 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2559 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2560 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2561 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2562 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2563 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2564 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2565 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2566 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2567 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2568 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2569 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2570 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2571 2572 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2573 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2574 a->sMvctx = oldmat->sMvctx; 2575 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2576 2577 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2578 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2579 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2580 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2581 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2582 *newmat = mat; 2583 PetscFunctionReturn(0); 2584 } 2585 2586 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2587 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2588 2589 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer) 2590 { 2591 PetscErrorCode ierr; 2592 PetscBool isbinary; 2593 2594 PetscFunctionBegin; 2595 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2596 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 2597 ierr = MatLoad_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 2598 PetscFunctionReturn(0); 2599 } 2600 2601 /*XXXXX@ 2602 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2603 2604 Input Parameters: 2605 . mat - the matrix 2606 . fact - factor 2607 2608 Not Collective on Mat, each process can have a different hash factor 2609 2610 Level: advanced 2611 2612 Notes: 2613 This can also be set by the command line option: -mat_use_hash_table fact 2614 2615 .seealso: MatSetOption() 2616 @XXXXX*/ 2617 2618 2619 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2620 { 2621 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2622 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2623 PetscReal atmp; 2624 PetscReal *work,*svalues,*rvalues; 2625 PetscErrorCode ierr; 2626 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2627 PetscMPIInt rank,size; 2628 PetscInt *rowners_bs,dest,count,source; 2629 PetscScalar *va; 2630 MatScalar *ba; 2631 MPI_Status stat; 2632 2633 PetscFunctionBegin; 2634 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2635 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2636 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2637 2638 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2639 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2640 2641 bs = A->rmap->bs; 2642 mbs = a->mbs; 2643 Mbs = a->Mbs; 2644 ba = b->a; 2645 bi = b->i; 2646 bj = b->j; 2647 2648 /* find ownerships */ 2649 rowners_bs = A->rmap->range; 2650 2651 /* each proc creates an array to be distributed */ 2652 ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2653 2654 /* row_max for B */ 2655 if (rank != size-1) { 2656 for (i=0; i<mbs; i++) { 2657 ncols = bi[1] - bi[0]; bi++; 2658 brow = bs*i; 2659 for (j=0; j<ncols; j++) { 2660 bcol = bs*(*bj); 2661 for (kcol=0; kcol<bs; kcol++) { 2662 col = bcol + kcol; /* local col index */ 2663 col += rowners_bs[rank+1]; /* global col index */ 2664 for (krow=0; krow<bs; krow++) { 2665 atmp = PetscAbsScalar(*ba); ba++; 2666 row = brow + krow; /* local row index */ 2667 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2668 if (work[col] < atmp) work[col] = atmp; 2669 } 2670 } 2671 bj++; 2672 } 2673 } 2674 2675 /* send values to its owners */ 2676 for (dest=rank+1; dest<size; dest++) { 2677 svalues = work + rowners_bs[dest]; 2678 count = rowners_bs[dest+1]-rowners_bs[dest]; 2679 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2680 } 2681 } 2682 2683 /* receive values */ 2684 if (rank) { 2685 rvalues = work; 2686 count = rowners_bs[rank+1]-rowners_bs[rank]; 2687 for (source=0; source<rank; source++) { 2688 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2689 /* process values */ 2690 for (i=0; i<count; i++) { 2691 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2692 } 2693 } 2694 } 2695 2696 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2697 ierr = PetscFree(work);CHKERRQ(ierr); 2698 PetscFunctionReturn(0); 2699 } 2700 2701 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2702 { 2703 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2704 PetscErrorCode ierr; 2705 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2706 PetscScalar *x,*ptr,*from; 2707 Vec bb1; 2708 const PetscScalar *b; 2709 2710 PetscFunctionBegin; 2711 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2712 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2713 2714 if (flag == SOR_APPLY_UPPER) { 2715 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2716 PetscFunctionReturn(0); 2717 } 2718 2719 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2720 if (flag & SOR_ZERO_INITIAL_GUESS) { 2721 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2722 its--; 2723 } 2724 2725 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2726 while (its--) { 2727 2728 /* lower triangular part: slvec0b = - B^T*xx */ 2729 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2730 2731 /* copy xx into slvec0a */ 2732 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2733 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2734 ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr); 2735 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2736 2737 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2738 2739 /* copy bb into slvec1a */ 2740 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2741 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2742 ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr); 2743 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2744 2745 /* set slvec1b = 0 */ 2746 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2747 2748 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2749 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2750 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2751 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2752 2753 /* upper triangular part: bb1 = bb1 - B*x */ 2754 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2755 2756 /* local diagonal sweep */ 2757 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2758 } 2759 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2760 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2761 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2762 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2763 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2764 } else if (flag & SOR_EISENSTAT) { 2765 Vec xx1; 2766 PetscBool hasop; 2767 const PetscScalar *diag; 2768 PetscScalar *sl,scale = (omega - 2.0)/omega; 2769 PetscInt i,n; 2770 2771 if (!mat->xx1) { 2772 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2773 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2774 } 2775 xx1 = mat->xx1; 2776 bb1 = mat->bb1; 2777 2778 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2779 2780 if (!mat->diag) { 2781 /* this is wrong for same matrix with new nonzero values */ 2782 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2783 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2784 } 2785 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2786 2787 if (hasop) { 2788 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2789 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2790 } else { 2791 /* 2792 These two lines are replaced by code that may be a bit faster for a good compiler 2793 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2794 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2795 */ 2796 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2797 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2798 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2799 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2800 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2801 if (omega == 1.0) { 2802 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2803 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2804 } else { 2805 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2806 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2807 } 2808 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2809 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2810 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2811 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2812 } 2813 2814 /* multiply off-diagonal portion of matrix */ 2815 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2816 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2817 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2818 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2819 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 2820 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2821 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2822 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2823 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2824 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2825 2826 /* local sweep */ 2827 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2828 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2829 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2830 PetscFunctionReturn(0); 2831 } 2832 2833 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2834 { 2835 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2836 PetscErrorCode ierr; 2837 Vec lvec1,bb1; 2838 2839 PetscFunctionBegin; 2840 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2841 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2842 2843 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2844 if (flag & SOR_ZERO_INITIAL_GUESS) { 2845 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2846 its--; 2847 } 2848 2849 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2850 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2851 while (its--) { 2852 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2853 2854 /* lower diagonal part: bb1 = bb - B^T*xx */ 2855 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2856 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2857 2858 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2859 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2860 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2861 2862 /* upper diagonal part: bb1 = bb1 - B*x */ 2863 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2864 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2865 2866 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2867 2868 /* diagonal sweep */ 2869 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2870 } 2871 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2872 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2873 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2874 PetscFunctionReturn(0); 2875 } 2876 2877 /*@ 2878 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2879 CSR format the local rows. 2880 2881 Collective 2882 2883 Input Parameters: 2884 + comm - MPI communicator 2885 . bs - the block size, only a block size of 1 is supported 2886 . m - number of local rows (Cannot be PETSC_DECIDE) 2887 . n - This value should be the same as the local size used in creating the 2888 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2889 calculated if N is given) For square matrices n is almost always m. 2890 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2891 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2892 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2893 . j - column indices 2894 - a - matrix values 2895 2896 Output Parameter: 2897 . mat - the matrix 2898 2899 Level: intermediate 2900 2901 Notes: 2902 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2903 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2904 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2905 2906 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2907 2908 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2909 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 2910 @*/ 2911 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2912 { 2913 PetscErrorCode ierr; 2914 2915 2916 PetscFunctionBegin; 2917 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2918 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2919 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2920 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2921 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2922 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2923 PetscFunctionReturn(0); 2924 } 2925 2926 2927 /*@C 2928 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values 2929 2930 Collective 2931 2932 Input Parameters: 2933 + B - the matrix 2934 . bs - the block size 2935 . i - the indices into j for the start of each local row (starts with zero) 2936 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2937 - v - optional values in the matrix 2938 2939 Level: advanced 2940 2941 Notes: 2942 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2943 and usually the numerical values as well 2944 2945 Any entries below the diagonal are ignored 2946 2947 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2948 @*/ 2949 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2950 { 2951 PetscErrorCode ierr; 2952 2953 PetscFunctionBegin; 2954 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2955 PetscFunctionReturn(0); 2956 } 2957 2958 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2959 { 2960 PetscErrorCode ierr; 2961 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 2962 PetscInt *indx; 2963 PetscScalar *values; 2964 2965 PetscFunctionBegin; 2966 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2967 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2968 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 2969 PetscInt *dnz,*onz,mbs,Nbs,nbs; 2970 PetscInt *bindx,rmax=a->rmax,j; 2971 PetscMPIInt rank,size; 2972 2973 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2974 mbs = m/bs; Nbs = N/cbs; 2975 if (n == PETSC_DECIDE) { 2976 ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N); 2977 } 2978 nbs = n/cbs; 2979 2980 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 2981 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 2982 2983 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2984 ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr); 2985 if (rank == size-1) { 2986 /* Check sum(nbs) = Nbs */ 2987 if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 2988 } 2989 2990 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 2991 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2992 for (i=0; i<mbs; i++) { 2993 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 2994 nnz = nnz/bs; 2995 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 2996 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 2997 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 2998 } 2999 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3000 ierr = PetscFree(bindx);CHKERRQ(ierr); 3001 3002 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3003 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3004 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3005 ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr); 3006 ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 3007 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3008 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3009 } 3010 3011 /* numeric phase */ 3012 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3013 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3014 3015 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3016 for (i=0; i<m; i++) { 3017 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3018 Ii = i + rstart; 3019 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3020 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3021 } 3022 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3023 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3024 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3025 PetscFunctionReturn(0); 3026 } 3027