1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #if defined(PETSC_HAVE_SCALAPACK) 11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 12 #endif 13 14 /* This could be moved to matimpl.h */ 15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 16 { 17 Mat preallocator; 18 PetscInt r,rstart,rend; 19 PetscInt bs,i,m,n,M,N; 20 PetscBool cong = PETSC_TRUE; 21 PetscErrorCode ierr; 22 23 PetscFunctionBegin; 24 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 25 PetscValidLogicalCollectiveInt(B,nm,2); 26 for (i = 0; i < nm; i++) { 27 PetscValidHeaderSpecific(X[i],MAT_CLASSID,3); 28 ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr); 29 PetscCheckFalse(!cong,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts"); 30 } 31 PetscValidLogicalCollectiveBool(B,fill,5); 32 ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr); 33 ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr); 34 ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr); 35 ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr); 36 ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr); 37 ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr); 38 ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr); 39 ierr = MatSetUp(preallocator);CHKERRQ(ierr); 40 ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr); 41 for (r = rstart; r < rend; ++r) { 42 PetscInt ncols; 43 const PetscInt *row; 44 const PetscScalar *vals; 45 46 for (i = 0; i < nm; i++) { 47 ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 48 ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 49 if (symm && symm[i]) { 50 ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 51 } 52 ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 53 } 54 } 55 ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 56 ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 57 ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr); 58 ierr = MatDestroy(&preallocator);CHKERRQ(ierr); 59 PetscFunctionReturn(0); 60 } 61 62 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 63 { 64 Mat B; 65 PetscErrorCode ierr; 66 PetscInt r; 67 68 PetscFunctionBegin; 69 if (reuse != MAT_REUSE_MATRIX) { 70 PetscBool symm = PETSC_TRUE,isdense; 71 PetscInt bs; 72 73 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 74 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 75 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 76 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 77 ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr); 78 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 79 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 80 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");CHKERRQ(ierr); 81 if (!isdense) { 82 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 83 ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr); 84 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 85 } else { 86 ierr = MatSetUp(B);CHKERRQ(ierr); 87 } 88 } else { 89 B = *newmat; 90 ierr = MatZeroEntries(B);CHKERRQ(ierr); 91 } 92 93 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 94 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 95 PetscInt ncols; 96 const PetscInt *row; 97 const PetscScalar *vals; 98 99 ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 100 ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 101 #if defined(PETSC_USE_COMPLEX) 102 if (A->hermitian) { 103 PetscInt i; 104 for (i = 0; i < ncols; i++) { 105 ierr = MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES);CHKERRQ(ierr); 106 } 107 } else { 108 ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 109 } 110 #else 111 ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 112 #endif 113 ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 114 } 115 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 116 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 117 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 118 119 if (reuse == MAT_INPLACE_MATRIX) { 120 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 121 } else { 122 *newmat = B; 123 } 124 PetscFunctionReturn(0); 125 } 126 127 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 128 { 129 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 130 PetscErrorCode ierr; 131 132 PetscFunctionBegin; 133 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 134 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 139 { 140 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 141 PetscErrorCode ierr; 142 143 PetscFunctionBegin; 144 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 145 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 150 { \ 151 brow = row/bs; \ 152 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 153 rmax = aimax[brow]; nrow = ailen[brow]; \ 154 bcol = col/bs; \ 155 ridx = row % bs; cidx = col % bs; \ 156 low = 0; high = nrow; \ 157 while (high-low > 3) { \ 158 t = (low+high)/2; \ 159 if (rp[t] > bcol) high = t; \ 160 else low = t; \ 161 } \ 162 for (_i=low; _i<high; _i++) { \ 163 if (rp[_i] > bcol) break; \ 164 if (rp[_i] == bcol) { \ 165 bap = ap + bs2*_i + bs*cidx + ridx; \ 166 if (addv == ADD_VALUES) *bap += value; \ 167 else *bap = value; \ 168 goto a_noinsert; \ 169 } \ 170 } \ 171 if (a->nonew == 1) goto a_noinsert; \ 172 PetscCheckFalse(a->nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 173 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 174 N = nrow++ - 1; \ 175 /* shift up all the later entries in this row */ \ 176 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 177 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 178 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 A->nonzerostate++;\ 182 a_noinsert:; \ 183 ailen[brow] = nrow; \ 184 } 185 186 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 187 { \ 188 brow = row/bs; \ 189 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 190 rmax = bimax[brow]; nrow = bilen[brow]; \ 191 bcol = col/bs; \ 192 ridx = row % bs; cidx = col % bs; \ 193 low = 0; high = nrow; \ 194 while (high-low > 3) { \ 195 t = (low+high)/2; \ 196 if (rp[t] > bcol) high = t; \ 197 else low = t; \ 198 } \ 199 for (_i=low; _i<high; _i++) { \ 200 if (rp[_i] > bcol) break; \ 201 if (rp[_i] == bcol) { \ 202 bap = ap + bs2*_i + bs*cidx + ridx; \ 203 if (addv == ADD_VALUES) *bap += value; \ 204 else *bap = value; \ 205 goto b_noinsert; \ 206 } \ 207 } \ 208 if (b->nonew == 1) goto b_noinsert; \ 209 PetscCheckFalse(b->nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 210 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 211 N = nrow++ - 1; \ 212 /* shift up all the later entries in this row */ \ 213 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 214 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 215 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 216 rp[_i] = bcol; \ 217 ap[bs2*_i + bs*cidx + ridx] = value; \ 218 B->nonzerostate++;\ 219 b_noinsert:; \ 220 bilen[brow] = nrow; \ 221 } 222 223 /* Only add/insert a(i,j) with i<=j (blocks). 224 Any a(i,j) with i>j input by user is ingored or generates an error 225 */ 226 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 227 { 228 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 229 MatScalar value; 230 PetscBool roworiented = baij->roworiented; 231 PetscErrorCode ierr; 232 PetscInt i,j,row,col; 233 PetscInt rstart_orig=mat->rmap->rstart; 234 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 235 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 236 237 /* Some Variables required in the macro */ 238 Mat A = baij->A; 239 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 240 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 241 MatScalar *aa =a->a; 242 243 Mat B = baij->B; 244 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 245 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 246 MatScalar *ba =b->a; 247 248 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 249 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 250 MatScalar *ap,*bap; 251 252 /* for stash */ 253 PetscInt n_loc, *in_loc = NULL; 254 MatScalar *v_loc = NULL; 255 256 PetscFunctionBegin; 257 if (!baij->donotstash) { 258 if (n > baij->n_loc) { 259 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 260 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 261 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 262 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 263 264 baij->n_loc = n; 265 } 266 in_loc = baij->in_loc; 267 v_loc = baij->v_loc; 268 } 269 270 for (i=0; i<m; i++) { 271 if (im[i] < 0) continue; 272 PetscCheckFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 273 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 274 row = im[i] - rstart_orig; /* local row index */ 275 for (j=0; j<n; j++) { 276 if (im[i]/bs > in[j]/bs) { 277 if (a->ignore_ltriangular) { 278 continue; /* ignore lower triangular blocks */ 279 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 280 } 281 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 282 col = in[j] - cstart_orig; /* local col index */ 283 brow = row/bs; bcol = col/bs; 284 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 285 if (roworiented) value = v[i*n+j]; 286 else value = v[i+j*m]; 287 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 288 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 289 } else if (in[j] < 0) continue; 290 else PetscCheckFalse(in[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 291 else { /* off-diag entry (B) */ 292 if (mat->was_assembled) { 293 if (!baij->colmap) { 294 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined(PETSC_USE_CTABLE) 297 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 298 col = col - 1; 299 #else 300 col = baij->colmap[in[j]/bs] - 1; 301 #endif 302 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 303 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 304 col = in[j]; 305 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 306 B = baij->B; 307 b = (Mat_SeqBAIJ*)(B)->data; 308 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 309 ba = b->a; 310 } else col += in[j]%bs; 311 } else col = in[j]; 312 if (roworiented) value = v[i*n+j]; 313 else value = v[i+j*m]; 314 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 315 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 316 } 317 } 318 } else { /* off processor entry */ 319 PetscCheckFalse(mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 320 if (!baij->donotstash) { 321 mat->assembled = PETSC_FALSE; 322 n_loc = 0; 323 for (j=0; j<n; j++) { 324 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 325 in_loc[n_loc] = in[j]; 326 if (roworiented) { 327 v_loc[n_loc] = v[i*n+j]; 328 } else { 329 v_loc[n_loc] = v[j*m+i]; 330 } 331 n_loc++; 332 } 333 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 334 } 335 } 336 } 337 PetscFunctionReturn(0); 338 } 339 340 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 341 { 342 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 343 PetscErrorCode ierr; 344 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 345 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 346 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 347 PetscBool roworiented=a->roworiented; 348 const PetscScalar *value = v; 349 MatScalar *ap,*aa = a->a,*bap; 350 351 PetscFunctionBegin; 352 if (col < row) { 353 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 354 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 355 } 356 rp = aj + ai[row]; 357 ap = aa + bs2*ai[row]; 358 rmax = imax[row]; 359 nrow = ailen[row]; 360 value = v; 361 low = 0; 362 high = nrow; 363 364 while (high-low > 7) { 365 t = (low+high)/2; 366 if (rp[t] > col) high = t; 367 else low = t; 368 } 369 for (i=low; i<high; i++) { 370 if (rp[i] > col) break; 371 if (rp[i] == col) { 372 bap = ap + bs2*i; 373 if (roworiented) { 374 if (is == ADD_VALUES) { 375 for (ii=0; ii<bs; ii++) { 376 for (jj=ii; jj<bs2; jj+=bs) { 377 bap[jj] += *value++; 378 } 379 } 380 } else { 381 for (ii=0; ii<bs; ii++) { 382 for (jj=ii; jj<bs2; jj+=bs) { 383 bap[jj] = *value++; 384 } 385 } 386 } 387 } else { 388 if (is == ADD_VALUES) { 389 for (ii=0; ii<bs; ii++) { 390 for (jj=0; jj<bs; jj++) { 391 *bap++ += *value++; 392 } 393 } 394 } else { 395 for (ii=0; ii<bs; ii++) { 396 for (jj=0; jj<bs; jj++) { 397 *bap++ = *value++; 398 } 399 } 400 } 401 } 402 goto noinsert2; 403 } 404 } 405 if (nonew == 1) goto noinsert2; 406 PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 407 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 408 N = nrow++ - 1; high++; 409 /* shift up all the later entries in this row */ 410 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 411 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 412 rp[i] = col; 413 bap = ap + bs2*i; 414 if (roworiented) { 415 for (ii=0; ii<bs; ii++) { 416 for (jj=ii; jj<bs2; jj+=bs) { 417 bap[jj] = *value++; 418 } 419 } 420 } else { 421 for (ii=0; ii<bs; ii++) { 422 for (jj=0; jj<bs; jj++) { 423 *bap++ = *value++; 424 } 425 } 426 } 427 noinsert2:; 428 ailen[row] = nrow; 429 PetscFunctionReturn(0); 430 } 431 432 /* 433 This routine is exactly duplicated in mpibaij.c 434 */ 435 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 436 { 437 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 438 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 439 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 440 PetscErrorCode ierr; 441 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 442 PetscBool roworiented=a->roworiented; 443 const PetscScalar *value = v; 444 MatScalar *ap,*aa = a->a,*bap; 445 446 PetscFunctionBegin; 447 rp = aj + ai[row]; 448 ap = aa + bs2*ai[row]; 449 rmax = imax[row]; 450 nrow = ailen[row]; 451 low = 0; 452 high = nrow; 453 value = v; 454 while (high-low > 7) { 455 t = (low+high)/2; 456 if (rp[t] > col) high = t; 457 else low = t; 458 } 459 for (i=low; i<high; i++) { 460 if (rp[i] > col) break; 461 if (rp[i] == col) { 462 bap = ap + bs2*i; 463 if (roworiented) { 464 if (is == ADD_VALUES) { 465 for (ii=0; ii<bs; ii++) { 466 for (jj=ii; jj<bs2; jj+=bs) { 467 bap[jj] += *value++; 468 } 469 } 470 } else { 471 for (ii=0; ii<bs; ii++) { 472 for (jj=ii; jj<bs2; jj+=bs) { 473 bap[jj] = *value++; 474 } 475 } 476 } 477 } else { 478 if (is == ADD_VALUES) { 479 for (ii=0; ii<bs; ii++,value+=bs) { 480 for (jj=0; jj<bs; jj++) { 481 bap[jj] += value[jj]; 482 } 483 bap += bs; 484 } 485 } else { 486 for (ii=0; ii<bs; ii++,value+=bs) { 487 for (jj=0; jj<bs; jj++) { 488 bap[jj] = value[jj]; 489 } 490 bap += bs; 491 } 492 } 493 } 494 goto noinsert2; 495 } 496 } 497 if (nonew == 1) goto noinsert2; 498 PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 499 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 500 N = nrow++ - 1; high++; 501 /* shift up all the later entries in this row */ 502 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 503 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 504 rp[i] = col; 505 bap = ap + bs2*i; 506 if (roworiented) { 507 for (ii=0; ii<bs; ii++) { 508 for (jj=ii; jj<bs2; jj+=bs) { 509 bap[jj] = *value++; 510 } 511 } 512 } else { 513 for (ii=0; ii<bs; ii++) { 514 for (jj=0; jj<bs; jj++) { 515 *bap++ = *value++; 516 } 517 } 518 } 519 noinsert2:; 520 ailen[row] = nrow; 521 PetscFunctionReturn(0); 522 } 523 524 /* 525 This routine could be optimized by removing the need for the block copy below and passing stride information 526 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 527 */ 528 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 529 { 530 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 531 const MatScalar *value; 532 MatScalar *barray =baij->barray; 533 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 534 PetscErrorCode ierr; 535 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 536 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 537 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 538 539 PetscFunctionBegin; 540 if (!barray) { 541 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 542 baij->barray = barray; 543 } 544 545 if (roworiented) { 546 stepval = (n-1)*bs; 547 } else { 548 stepval = (m-1)*bs; 549 } 550 for (i=0; i<m; i++) { 551 if (im[i] < 0) continue; 552 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 553 if (im[i] >= rstart && im[i] < rend) { 554 row = im[i] - rstart; 555 for (j=0; j<n; j++) { 556 if (im[i] > in[j]) { 557 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 558 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 559 } 560 /* If NumCol = 1 then a copy is not required */ 561 if ((roworiented) && (n == 1)) { 562 barray = (MatScalar*) v + i*bs2; 563 } else if ((!roworiented) && (m == 1)) { 564 barray = (MatScalar*) v + j*bs2; 565 } else { /* Here a copy is required */ 566 if (roworiented) { 567 value = v + i*(stepval+bs)*bs + j*bs; 568 } else { 569 value = v + j*(stepval+bs)*bs + i*bs; 570 } 571 for (ii=0; ii<bs; ii++,value+=stepval) { 572 for (jj=0; jj<bs; jj++) { 573 *barray++ = *value++; 574 } 575 } 576 barray -=bs2; 577 } 578 579 if (in[j] >= cstart && in[j] < cend) { 580 col = in[j] - cstart; 581 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 582 } else if (in[j] < 0) continue; 583 else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 584 else { 585 if (mat->was_assembled) { 586 if (!baij->colmap) { 587 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 590 #if defined(PETSC_USE_DEBUG) 591 #if defined(PETSC_USE_CTABLE) 592 { PetscInt data; 593 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 594 PetscCheckFalse((data - 1) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 595 } 596 #else 597 PetscCheckFalse((baij->colmap[in[j]] - 1) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 598 #endif 599 #endif 600 #if defined(PETSC_USE_CTABLE) 601 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 602 col = (col - 1)/bs; 603 #else 604 col = (baij->colmap[in[j]] - 1)/bs; 605 #endif 606 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 607 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 608 col = in[j]; 609 } 610 } else col = in[j]; 611 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 612 } 613 } 614 } else { 615 PetscCheckFalse(mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 616 if (!baij->donotstash) { 617 if (roworiented) { 618 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 619 } else { 620 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 621 } 622 } 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 629 { 630 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 633 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* negative row */ 638 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 640 row = idxm[i] - bsrstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* negative column */ 643 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= bscstart && idxn[j] < bscend) { 645 col = idxn[j] - bscstart; 646 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } else { 648 if (!baij->colmap) { 649 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 653 data--; 654 #else 655 data = baij->colmap[idxn[j]/bs]-1; 656 #endif 657 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 658 else { 659 col = data + idxn[j]%bs; 660 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 661 } 662 } 663 } 664 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 665 } 666 PetscFunctionReturn(0); 667 } 668 669 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 670 { 671 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 672 PetscErrorCode ierr; 673 PetscReal sum[2],*lnorm2; 674 675 PetscFunctionBegin; 676 if (baij->size == 1) { 677 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 678 } else { 679 if (type == NORM_FROBENIUS) { 680 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 681 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 682 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 683 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 684 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 685 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 686 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 687 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 688 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 689 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 690 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 691 PetscReal *rsum,*rsum2,vabs; 692 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 693 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 694 MatScalar *v; 695 696 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 697 ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr); 698 /* Amat */ 699 v = amat->a; jj = amat->j; 700 for (brow=0; brow<mbs; brow++) { 701 grow = bs*(rstart + brow); 702 nz = amat->i[brow+1] - amat->i[brow]; 703 for (bcol=0; bcol<nz; bcol++) { 704 gcol = bs*(rstart + *jj); jj++; 705 for (col=0; col<bs; col++) { 706 for (row=0; row<bs; row++) { 707 vabs = PetscAbsScalar(*v); v++; 708 rsum[gcol+col] += vabs; 709 /* non-diagonal block */ 710 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 711 } 712 } 713 } 714 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 715 } 716 /* Bmat */ 717 v = bmat->a; jj = bmat->j; 718 for (brow=0; brow<mbs; brow++) { 719 grow = bs*(rstart + brow); 720 nz = bmat->i[brow+1] - bmat->i[brow]; 721 for (bcol=0; bcol<nz; bcol++) { 722 gcol = bs*garray[*jj]; jj++; 723 for (col=0; col<bs; col++) { 724 for (row=0; row<bs; row++) { 725 vabs = PetscAbsScalar(*v); v++; 726 rsum[gcol+col] += vabs; 727 rsum[grow+row] += vabs; 728 } 729 } 730 } 731 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 732 } 733 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 734 *norm = 0.0; 735 for (col=0; col<mat->cmap->N; col++) { 736 if (rsum2[col] > *norm) *norm = rsum2[col]; 737 } 738 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 739 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 740 } 741 PetscFunctionReturn(0); 742 } 743 744 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 745 { 746 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 747 PetscErrorCode ierr; 748 PetscInt nstash,reallocs; 749 750 PetscFunctionBegin; 751 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 752 753 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 754 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 755 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 756 ierr = PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 757 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 758 ierr = PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 759 PetscFunctionReturn(0); 760 } 761 762 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 763 { 764 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 765 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 766 PetscErrorCode ierr; 767 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 768 PetscInt *row,*col; 769 PetscBool other_disassembled; 770 PetscMPIInt n; 771 PetscBool r1,r2,r3; 772 MatScalar *val; 773 774 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 775 PetscFunctionBegin; 776 if (!baij->donotstash && !mat->nooffprocentries) { 777 while (1) { 778 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 779 if (!flg) break; 780 781 for (i=0; i<n;) { 782 /* Now identify the consecutive vals belonging to the same row */ 783 for (j=i,rstart=row[j]; j<n; j++) { 784 if (row[j] != rstart) break; 785 } 786 if (j < n) ncols = j-i; 787 else ncols = n-i; 788 /* Now assemble all these values with a single function call */ 789 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 790 i = j; 791 } 792 } 793 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 794 /* Now process the block-stash. Since the values are stashed column-oriented, 795 set the roworiented flag to column oriented, and after MatSetValues() 796 restore the original flags */ 797 r1 = baij->roworiented; 798 r2 = a->roworiented; 799 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 800 801 baij->roworiented = PETSC_FALSE; 802 a->roworiented = PETSC_FALSE; 803 804 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 805 while (1) { 806 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 807 if (!flg) break; 808 809 for (i=0; i<n;) { 810 /* Now identify the consecutive vals belonging to the same row */ 811 for (j=i,rstart=row[j]; j<n; j++) { 812 if (row[j] != rstart) break; 813 } 814 if (j < n) ncols = j-i; 815 else ncols = n-i; 816 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 817 i = j; 818 } 819 } 820 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 821 822 baij->roworiented = r1; 823 a->roworiented = r2; 824 825 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 826 } 827 828 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 829 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 830 831 /* determine if any processor has disassembled, if so we must 832 also disassemble ourselfs, in order that we may reassemble. */ 833 /* 834 if nonzero structure of submatrix B cannot change then we know that 835 no processor disassembled thus we can skip this stuff 836 */ 837 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 838 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 839 if (mat->was_assembled && !other_disassembled) { 840 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 841 } 842 } 843 844 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 845 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 846 } 847 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 848 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 849 850 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 851 852 baij->rowvalues = NULL; 853 854 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 855 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 856 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 857 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 858 } 859 PetscFunctionReturn(0); 860 } 861 862 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 863 #include <petscdraw.h> 864 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 865 { 866 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 867 PetscErrorCode ierr; 868 PetscInt bs = mat->rmap->bs; 869 PetscMPIInt rank = baij->rank; 870 PetscBool iascii,isdraw; 871 PetscViewer sviewer; 872 PetscViewerFormat format; 873 874 PetscFunctionBegin; 875 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 876 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 877 if (iascii) { 878 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 879 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 880 MatInfo info; 881 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 882 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 883 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 884 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 885 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 886 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 887 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 888 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 889 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 890 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 891 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 892 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 893 PetscFunctionReturn(0); 894 } else if (format == PETSC_VIEWER_ASCII_INFO) { 895 ierr = PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs);CHKERRQ(ierr); 896 PetscFunctionReturn(0); 897 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 898 PetscFunctionReturn(0); 899 } 900 } 901 902 if (isdraw) { 903 PetscDraw draw; 904 PetscBool isnull; 905 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 906 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 907 if (isnull) PetscFunctionReturn(0); 908 } 909 910 { 911 /* assemble the entire matrix onto first processor. */ 912 Mat A; 913 Mat_SeqSBAIJ *Aloc; 914 Mat_SeqBAIJ *Bloc; 915 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 916 MatScalar *a; 917 const char *matname; 918 919 /* Should this be the same type as mat? */ 920 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 921 if (rank == 0) { 922 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 923 } else { 924 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 925 } 926 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 927 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 928 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 929 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 930 931 /* copy over the A part */ 932 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 933 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 934 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 935 936 for (i=0; i<mbs; i++) { 937 rvals[0] = bs*(baij->rstartbs + i); 938 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 939 for (j=ai[i]; j<ai[i+1]; j++) { 940 col = (baij->cstartbs+aj[j])*bs; 941 for (k=0; k<bs; k++) { 942 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 943 col++; 944 a += bs; 945 } 946 } 947 } 948 /* copy over the B part */ 949 Bloc = (Mat_SeqBAIJ*)baij->B->data; 950 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 951 for (i=0; i<mbs; i++) { 952 953 rvals[0] = bs*(baij->rstartbs + i); 954 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 955 for (j=ai[i]; j<ai[i+1]; j++) { 956 col = baij->garray[aj[j]]*bs; 957 for (k=0; k<bs; k++) { 958 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 959 col++; 960 a += bs; 961 } 962 } 963 } 964 ierr = PetscFree(rvals);CHKERRQ(ierr); 965 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 966 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 967 /* 968 Everyone has to call to draw the matrix since the graphics waits are 969 synchronized across all processors that share the PetscDraw object 970 */ 971 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 972 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 973 if (rank == 0) { 974 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 975 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 976 } 977 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 978 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 979 ierr = MatDestroy(&A);CHKERRQ(ierr); 980 } 981 PetscFunctionReturn(0); 982 } 983 984 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 985 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 986 987 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 988 { 989 PetscErrorCode ierr; 990 PetscBool iascii,isdraw,issocket,isbinary; 991 992 PetscFunctionBegin; 993 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 994 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 995 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 996 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 997 if (iascii || isdraw || issocket) { 998 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 999 } else if (isbinary) { 1000 ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1001 } 1002 PetscFunctionReturn(0); 1003 } 1004 1005 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1006 { 1007 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1008 PetscErrorCode ierr; 1009 1010 PetscFunctionBegin; 1011 #if defined(PETSC_USE_LOG) 1012 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1013 #endif 1014 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1015 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1016 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1017 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1018 #if defined(PETSC_USE_CTABLE) 1019 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1020 #else 1021 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1022 #endif 1023 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1024 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1025 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1026 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 1027 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 1028 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 1029 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 1030 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 1031 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 1032 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1033 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1034 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1035 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 1036 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 1037 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1038 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1039 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1040 #endif 1041 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 1042 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1043 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1044 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1045 1046 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1047 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1048 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1049 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1050 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1051 #if defined(PETSC_HAVE_ELEMENTAL) 1052 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 1053 #endif 1054 #if defined(PETSC_HAVE_SCALAPACK) 1055 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL);CHKERRQ(ierr); 1056 #endif 1057 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr); 1058 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr); 1059 PetscFunctionReturn(0); 1060 } 1061 1062 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1063 { 1064 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1065 PetscErrorCode ierr; 1066 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1067 PetscScalar *from; 1068 const PetscScalar *x; 1069 1070 PetscFunctionBegin; 1071 /* diagonal part */ 1072 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1073 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1074 1075 /* subdiagonal part */ 1076 PetscCheckFalse(!a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name); 1077 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1078 1079 /* copy x into the vec slvec0 */ 1080 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1081 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1082 1083 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1084 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1085 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1086 1087 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1088 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1089 /* supperdiagonal part */ 1090 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1095 { 1096 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1097 PetscErrorCode ierr; 1098 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1099 PetscScalar *from; 1100 const PetscScalar *x; 1101 1102 PetscFunctionBegin; 1103 /* diagonal part */ 1104 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1105 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1106 1107 /* subdiagonal part */ 1108 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1109 1110 /* copy x into the vec slvec0 */ 1111 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1112 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1113 1114 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1115 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1116 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1117 1118 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1120 /* supperdiagonal part */ 1121 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz) 1126 { 1127 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1130 PetscScalar *from,zero=0.0; 1131 const PetscScalar *x; 1132 1133 PetscFunctionBegin; 1134 /* diagonal part */ 1135 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1136 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1137 1138 /* subdiagonal part */ 1139 PetscCheckFalse(!a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name); 1140 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1141 1142 /* copy x into the vec slvec0 */ 1143 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1144 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1145 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1146 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1147 1148 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1149 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1150 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1151 1152 /* supperdiagonal part */ 1153 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1154 PetscFunctionReturn(0); 1155 } 1156 1157 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1158 { 1159 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1160 PetscErrorCode ierr; 1161 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1162 PetscScalar *from,zero=0.0; 1163 const PetscScalar *x; 1164 1165 PetscFunctionBegin; 1166 /* diagonal part */ 1167 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1168 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1169 1170 /* subdiagonal part */ 1171 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1172 1173 /* copy x into the vec slvec0 */ 1174 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1175 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1176 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1177 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1178 1179 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1180 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1181 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1182 1183 /* supperdiagonal part */ 1184 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 /* 1189 This only works correctly for square matrices where the subblock A->A is the 1190 diagonal block 1191 */ 1192 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1193 { 1194 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1195 PetscErrorCode ierr; 1196 1197 PetscFunctionBegin; 1198 /* PetscCheckFalse(a->rmap->N != a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1199 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1200 PetscFunctionReturn(0); 1201 } 1202 1203 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1204 { 1205 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1206 PetscErrorCode ierr; 1207 1208 PetscFunctionBegin; 1209 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1210 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1211 PetscFunctionReturn(0); 1212 } 1213 1214 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1215 { 1216 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1217 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1218 PetscErrorCode ierr; 1219 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1220 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1221 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1222 1223 PetscFunctionBegin; 1224 PetscCheckFalse(mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1225 mat->getrowactive = PETSC_TRUE; 1226 1227 if (!mat->rowvalues && (idx || v)) { 1228 /* 1229 allocate enough space to hold information from the longest row. 1230 */ 1231 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1232 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1233 PetscInt max = 1,mbs = mat->mbs,tmp; 1234 for (i=0; i<mbs; i++) { 1235 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1236 if (max < tmp) max = tmp; 1237 } 1238 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1239 } 1240 1241 PetscCheckFalse(row < brstart || row >= brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1242 lrow = row - brstart; /* local row index */ 1243 1244 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1245 if (!v) {pvA = NULL; pvB = NULL;} 1246 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1247 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1248 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1249 nztot = nzA + nzB; 1250 1251 cmap = mat->garray; 1252 if (v || idx) { 1253 if (nztot) { 1254 /* Sort by increasing column numbers, assuming A and B already sorted */ 1255 PetscInt imark = -1; 1256 if (v) { 1257 *v = v_p = mat->rowvalues; 1258 for (i=0; i<nzB; i++) { 1259 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1260 else break; 1261 } 1262 imark = i; 1263 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1264 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1265 } 1266 if (idx) { 1267 *idx = idx_p = mat->rowindices; 1268 if (imark > -1) { 1269 for (i=0; i<imark; i++) { 1270 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1271 } 1272 } else { 1273 for (i=0; i<nzB; i++) { 1274 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1275 else break; 1276 } 1277 imark = i; 1278 } 1279 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1280 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1281 } 1282 } else { 1283 if (idx) *idx = NULL; 1284 if (v) *v = NULL; 1285 } 1286 } 1287 *nz = nztot; 1288 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1289 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1290 PetscFunctionReturn(0); 1291 } 1292 1293 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1294 { 1295 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1296 1297 PetscFunctionBegin; 1298 PetscCheckFalse(!baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1299 baij->getrowactive = PETSC_FALSE; 1300 PetscFunctionReturn(0); 1301 } 1302 1303 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1304 { 1305 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1306 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1307 1308 PetscFunctionBegin; 1309 aA->getrow_utriangular = PETSC_TRUE; 1310 PetscFunctionReturn(0); 1311 } 1312 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1313 { 1314 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1315 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1316 1317 PetscFunctionBegin; 1318 aA->getrow_utriangular = PETSC_FALSE; 1319 PetscFunctionReturn(0); 1320 } 1321 1322 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1323 { 1324 #if defined(PETSC_USE_COMPLEX) 1325 PetscErrorCode ierr; 1326 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 1327 1328 PetscFunctionBegin; 1329 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1330 ierr = MatConjugate(a->B);CHKERRQ(ierr); 1331 #else 1332 PetscFunctionBegin; 1333 #endif 1334 PetscFunctionReturn(0); 1335 } 1336 1337 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1338 { 1339 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1340 PetscErrorCode ierr; 1341 1342 PetscFunctionBegin; 1343 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1344 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1345 PetscFunctionReturn(0); 1346 } 1347 1348 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1349 { 1350 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1351 PetscErrorCode ierr; 1352 1353 PetscFunctionBegin; 1354 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1355 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 1359 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1360 Input: isrow - distributed(parallel), 1361 iscol_local - locally owned (seq) 1362 */ 1363 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1364 { 1365 PetscErrorCode ierr; 1366 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1367 const PetscInt *ptr1,*ptr2; 1368 1369 PetscFunctionBegin; 1370 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1371 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1372 if (sz1 > sz2) { 1373 *flg = PETSC_FALSE; 1374 PetscFunctionReturn(0); 1375 } 1376 1377 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1378 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1379 1380 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1381 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1382 ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr); 1383 ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr); 1384 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1385 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1386 1387 nmatch=0; 1388 k = 0; 1389 for (i=0; i<sz1; i++) { 1390 for (j=k; j<sz2; j++) { 1391 if (a1[i] == a2[j]) { 1392 k = j; nmatch++; 1393 break; 1394 } 1395 } 1396 } 1397 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1398 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1399 ierr = PetscFree(a1);CHKERRQ(ierr); 1400 ierr = PetscFree(a2);CHKERRQ(ierr); 1401 if (nmatch < sz1) { 1402 *flg = PETSC_FALSE; 1403 } else { 1404 *flg = PETSC_TRUE; 1405 } 1406 PetscFunctionReturn(0); 1407 } 1408 1409 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1410 { 1411 PetscErrorCode ierr; 1412 IS iscol_local; 1413 PetscInt csize; 1414 PetscBool isequal; 1415 1416 PetscFunctionBegin; 1417 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1418 if (call == MAT_REUSE_MATRIX) { 1419 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1420 PetscCheckFalse(!iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1421 } else { 1422 PetscBool issorted; 1423 1424 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1425 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1426 ierr = ISSorted(iscol_local, &issorted);CHKERRQ(ierr); 1427 PetscCheckFalse(!isequal || !issorted,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow and be sorted"); 1428 } 1429 1430 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1431 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1432 if (call == MAT_INITIAL_MATRIX) { 1433 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1434 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1435 } 1436 PetscFunctionReturn(0); 1437 } 1438 1439 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1440 { 1441 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1442 PetscErrorCode ierr; 1443 1444 PetscFunctionBegin; 1445 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1446 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1447 PetscFunctionReturn(0); 1448 } 1449 1450 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1451 { 1452 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1453 Mat A = a->A,B = a->B; 1454 PetscErrorCode ierr; 1455 PetscLogDouble isend[5],irecv[5]; 1456 1457 PetscFunctionBegin; 1458 info->block_size = (PetscReal)matin->rmap->bs; 1459 1460 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1461 1462 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1463 isend[3] = info->memory; isend[4] = info->mallocs; 1464 1465 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1466 1467 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1468 isend[3] += info->memory; isend[4] += info->mallocs; 1469 if (flag == MAT_LOCAL) { 1470 info->nz_used = isend[0]; 1471 info->nz_allocated = isend[1]; 1472 info->nz_unneeded = isend[2]; 1473 info->memory = isend[3]; 1474 info->mallocs = isend[4]; 1475 } else if (flag == MAT_GLOBAL_MAX) { 1476 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1477 1478 info->nz_used = irecv[0]; 1479 info->nz_allocated = irecv[1]; 1480 info->nz_unneeded = irecv[2]; 1481 info->memory = irecv[3]; 1482 info->mallocs = irecv[4]; 1483 } else if (flag == MAT_GLOBAL_SUM) { 1484 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1485 1486 info->nz_used = irecv[0]; 1487 info->nz_allocated = irecv[1]; 1488 info->nz_unneeded = irecv[2]; 1489 info->memory = irecv[3]; 1490 info->mallocs = irecv[4]; 1491 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1492 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1493 info->fill_ratio_needed = 0; 1494 info->factor_mallocs = 0; 1495 PetscFunctionReturn(0); 1496 } 1497 1498 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1499 { 1500 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1501 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1502 PetscErrorCode ierr; 1503 1504 PetscFunctionBegin; 1505 switch (op) { 1506 case MAT_NEW_NONZERO_LOCATIONS: 1507 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1508 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1509 case MAT_KEEP_NONZERO_PATTERN: 1510 case MAT_SUBMAT_SINGLEIS: 1511 case MAT_NEW_NONZERO_LOCATION_ERR: 1512 MatCheckPreallocated(A,1); 1513 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1514 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1515 break; 1516 case MAT_ROW_ORIENTED: 1517 MatCheckPreallocated(A,1); 1518 a->roworiented = flg; 1519 1520 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1521 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1522 break; 1523 case MAT_FORCE_DIAGONAL_ENTRIES: 1524 case MAT_SORTED_FULL: 1525 ierr = PetscInfo(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1526 break; 1527 case MAT_IGNORE_OFF_PROC_ENTRIES: 1528 a->donotstash = flg; 1529 break; 1530 case MAT_USE_HASH_TABLE: 1531 a->ht_flag = flg; 1532 break; 1533 case MAT_HERMITIAN: 1534 MatCheckPreallocated(A,1); 1535 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1536 #if defined(PETSC_USE_COMPLEX) 1537 if (flg) { /* need different mat-vec ops */ 1538 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1539 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1540 A->ops->multtranspose = NULL; 1541 A->ops->multtransposeadd = NULL; 1542 A->symmetric = PETSC_FALSE; 1543 } 1544 #endif 1545 break; 1546 case MAT_SPD: 1547 case MAT_SYMMETRIC: 1548 MatCheckPreallocated(A,1); 1549 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1550 #if defined(PETSC_USE_COMPLEX) 1551 if (flg) { /* restore to use default mat-vec ops */ 1552 A->ops->mult = MatMult_MPISBAIJ; 1553 A->ops->multadd = MatMultAdd_MPISBAIJ; 1554 A->ops->multtranspose = MatMult_MPISBAIJ; 1555 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1556 } 1557 #endif 1558 break; 1559 case MAT_STRUCTURALLY_SYMMETRIC: 1560 MatCheckPreallocated(A,1); 1561 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1562 break; 1563 case MAT_SYMMETRY_ETERNAL: 1564 PetscCheckFalse(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1565 ierr = PetscInfo(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1566 break; 1567 case MAT_IGNORE_LOWER_TRIANGULAR: 1568 aA->ignore_ltriangular = flg; 1569 break; 1570 case MAT_ERROR_LOWER_TRIANGULAR: 1571 aA->ignore_ltriangular = flg; 1572 break; 1573 case MAT_GETROW_UPPERTRIANGULAR: 1574 aA->getrow_utriangular = flg; 1575 break; 1576 default: 1577 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1578 } 1579 PetscFunctionReturn(0); 1580 } 1581 1582 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1583 { 1584 PetscErrorCode ierr; 1585 1586 PetscFunctionBegin; 1587 if (reuse == MAT_INITIAL_MATRIX) { 1588 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1589 } else if (reuse == MAT_REUSE_MATRIX) { 1590 ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1591 } 1592 PetscFunctionReturn(0); 1593 } 1594 1595 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1596 { 1597 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1598 Mat a = baij->A, b=baij->B; 1599 PetscErrorCode ierr; 1600 PetscInt nv,m,n; 1601 PetscBool flg; 1602 1603 PetscFunctionBegin; 1604 if (ll != rr) { 1605 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1606 PetscCheckFalse(!flg,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same"); 1607 } 1608 if (!ll) PetscFunctionReturn(0); 1609 1610 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1611 PetscCheckFalse(m != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same",m,n); 1612 1613 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1614 PetscCheckFalse(nv!=n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1615 1616 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1617 1618 /* left diagonalscale the off-diagonal part */ 1619 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1620 1621 /* scale the diagonal part */ 1622 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1623 1624 /* right diagonalscale the off-diagonal part */ 1625 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1626 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1627 PetscFunctionReturn(0); 1628 } 1629 1630 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1631 { 1632 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1633 PetscErrorCode ierr; 1634 1635 PetscFunctionBegin; 1636 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1637 PetscFunctionReturn(0); 1638 } 1639 1640 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1641 1642 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1643 { 1644 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1645 Mat a,b,c,d; 1646 PetscBool flg; 1647 PetscErrorCode ierr; 1648 1649 PetscFunctionBegin; 1650 a = matA->A; b = matA->B; 1651 c = matB->A; d = matB->B; 1652 1653 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1654 if (flg) { 1655 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1656 } 1657 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 1658 PetscFunctionReturn(0); 1659 } 1660 1661 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1662 { 1663 PetscErrorCode ierr; 1664 PetscBool isbaij; 1665 1666 PetscFunctionBegin; 1667 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr); 1668 PetscCheckFalse(!isbaij,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name); 1669 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1670 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1671 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1672 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1673 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1674 } else { 1675 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1676 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1677 1678 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1679 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1680 } 1681 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1682 PetscFunctionReturn(0); 1683 } 1684 1685 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1686 { 1687 PetscErrorCode ierr; 1688 1689 PetscFunctionBegin; 1690 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 1691 PetscFunctionReturn(0); 1692 } 1693 1694 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1695 { 1696 PetscErrorCode ierr; 1697 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1698 PetscBLASInt bnz,one=1; 1699 Mat_SeqSBAIJ *xa,*ya; 1700 Mat_SeqBAIJ *xb,*yb; 1701 1702 PetscFunctionBegin; 1703 if (str == SAME_NONZERO_PATTERN) { 1704 PetscScalar alpha = a; 1705 xa = (Mat_SeqSBAIJ*)xx->A->data; 1706 ya = (Mat_SeqSBAIJ*)yy->A->data; 1707 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1708 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1709 xb = (Mat_SeqBAIJ*)xx->B->data; 1710 yb = (Mat_SeqBAIJ*)yy->B->data; 1711 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1712 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1713 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1714 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1715 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1716 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1717 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1718 } else { 1719 Mat B; 1720 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1721 PetscCheckFalse(bs != X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1722 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1723 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1724 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1725 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1726 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1727 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1728 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1729 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1730 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1731 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1732 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1733 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1734 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1735 ierr = MatHeaderMerge(Y,&B);CHKERRQ(ierr); 1736 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1737 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1738 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1739 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1740 } 1741 PetscFunctionReturn(0); 1742 } 1743 1744 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1745 { 1746 PetscErrorCode ierr; 1747 PetscInt i; 1748 PetscBool flg; 1749 1750 PetscFunctionBegin; 1751 ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1752 for (i=0; i<n; i++) { 1753 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1754 if (!flg) { 1755 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1756 } 1757 } 1758 PetscFunctionReturn(0); 1759 } 1760 1761 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1762 { 1763 PetscErrorCode ierr; 1764 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1765 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1766 1767 PetscFunctionBegin; 1768 if (!Y->preallocated) { 1769 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1770 } else if (!aij->nz) { 1771 PetscInt nonew = aij->nonew; 1772 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1773 aij->nonew = nonew; 1774 } 1775 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1776 PetscFunctionReturn(0); 1777 } 1778 1779 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1780 { 1781 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1782 PetscErrorCode ierr; 1783 1784 PetscFunctionBegin; 1785 PetscCheckFalse(A->rmap->n != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1786 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1787 if (d) { 1788 PetscInt rstart; 1789 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1790 *d += rstart/A->rmap->bs; 1791 1792 } 1793 PetscFunctionReturn(0); 1794 } 1795 1796 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1797 { 1798 PetscFunctionBegin; 1799 *a = ((Mat_MPISBAIJ*)A->data)->A; 1800 PetscFunctionReturn(0); 1801 } 1802 1803 /* -------------------------------------------------------------------*/ 1804 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1805 MatGetRow_MPISBAIJ, 1806 MatRestoreRow_MPISBAIJ, 1807 MatMult_MPISBAIJ, 1808 /* 4*/ MatMultAdd_MPISBAIJ, 1809 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1810 MatMultAdd_MPISBAIJ, 1811 NULL, 1812 NULL, 1813 NULL, 1814 /* 10*/ NULL, 1815 NULL, 1816 NULL, 1817 MatSOR_MPISBAIJ, 1818 MatTranspose_MPISBAIJ, 1819 /* 15*/ MatGetInfo_MPISBAIJ, 1820 MatEqual_MPISBAIJ, 1821 MatGetDiagonal_MPISBAIJ, 1822 MatDiagonalScale_MPISBAIJ, 1823 MatNorm_MPISBAIJ, 1824 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1825 MatAssemblyEnd_MPISBAIJ, 1826 MatSetOption_MPISBAIJ, 1827 MatZeroEntries_MPISBAIJ, 1828 /* 24*/ NULL, 1829 NULL, 1830 NULL, 1831 NULL, 1832 NULL, 1833 /* 29*/ MatSetUp_MPISBAIJ, 1834 NULL, 1835 NULL, 1836 MatGetDiagonalBlock_MPISBAIJ, 1837 NULL, 1838 /* 34*/ MatDuplicate_MPISBAIJ, 1839 NULL, 1840 NULL, 1841 NULL, 1842 NULL, 1843 /* 39*/ MatAXPY_MPISBAIJ, 1844 MatCreateSubMatrices_MPISBAIJ, 1845 MatIncreaseOverlap_MPISBAIJ, 1846 MatGetValues_MPISBAIJ, 1847 MatCopy_MPISBAIJ, 1848 /* 44*/ NULL, 1849 MatScale_MPISBAIJ, 1850 MatShift_MPISBAIJ, 1851 NULL, 1852 NULL, 1853 /* 49*/ NULL, 1854 NULL, 1855 NULL, 1856 NULL, 1857 NULL, 1858 /* 54*/ NULL, 1859 NULL, 1860 MatSetUnfactored_MPISBAIJ, 1861 NULL, 1862 MatSetValuesBlocked_MPISBAIJ, 1863 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1864 NULL, 1865 NULL, 1866 NULL, 1867 NULL, 1868 /* 64*/ NULL, 1869 NULL, 1870 NULL, 1871 NULL, 1872 NULL, 1873 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1874 NULL, 1875 MatConvert_MPISBAIJ_Basic, 1876 NULL, 1877 NULL, 1878 /* 74*/ NULL, 1879 NULL, 1880 NULL, 1881 NULL, 1882 NULL, 1883 /* 79*/ NULL, 1884 NULL, 1885 NULL, 1886 NULL, 1887 MatLoad_MPISBAIJ, 1888 /* 84*/ NULL, 1889 NULL, 1890 NULL, 1891 NULL, 1892 NULL, 1893 /* 89*/ NULL, 1894 NULL, 1895 NULL, 1896 NULL, 1897 NULL, 1898 /* 94*/ NULL, 1899 NULL, 1900 NULL, 1901 NULL, 1902 NULL, 1903 /* 99*/ NULL, 1904 NULL, 1905 NULL, 1906 MatConjugate_MPISBAIJ, 1907 NULL, 1908 /*104*/ NULL, 1909 MatRealPart_MPISBAIJ, 1910 MatImaginaryPart_MPISBAIJ, 1911 MatGetRowUpperTriangular_MPISBAIJ, 1912 MatRestoreRowUpperTriangular_MPISBAIJ, 1913 /*109*/ NULL, 1914 NULL, 1915 NULL, 1916 NULL, 1917 MatMissingDiagonal_MPISBAIJ, 1918 /*114*/ NULL, 1919 NULL, 1920 NULL, 1921 NULL, 1922 NULL, 1923 /*119*/ NULL, 1924 NULL, 1925 NULL, 1926 NULL, 1927 NULL, 1928 /*124*/ NULL, 1929 NULL, 1930 NULL, 1931 NULL, 1932 NULL, 1933 /*129*/ NULL, 1934 NULL, 1935 NULL, 1936 NULL, 1937 NULL, 1938 /*134*/ NULL, 1939 NULL, 1940 NULL, 1941 NULL, 1942 NULL, 1943 /*139*/ MatSetBlockSizes_Default, 1944 NULL, 1945 NULL, 1946 NULL, 1947 NULL, 1948 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1949 NULL, 1950 NULL, 1951 NULL 1952 }; 1953 1954 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1955 { 1956 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1957 PetscErrorCode ierr; 1958 PetscInt i,mbs,Mbs; 1959 PetscMPIInt size; 1960 1961 PetscFunctionBegin; 1962 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1963 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1964 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1965 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1966 PetscCheckFalse(B->rmap->N > B->cmap->N,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->N,B->cmap->N); 1967 PetscCheckFalse(B->rmap->n > B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->n,B->cmap->n); 1968 1969 mbs = B->rmap->n/bs; 1970 Mbs = B->rmap->N/bs; 1971 PetscCheckFalse(mbs*bs != B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,bs); 1972 1973 B->rmap->bs = bs; 1974 b->bs2 = bs*bs; 1975 b->mbs = mbs; 1976 b->Mbs = Mbs; 1977 b->nbs = B->cmap->n/bs; 1978 b->Nbs = B->cmap->N/bs; 1979 1980 for (i=0; i<=b->size; i++) { 1981 b->rangebs[i] = B->rmap->range[i]/bs; 1982 } 1983 b->rstartbs = B->rmap->rstart/bs; 1984 b->rendbs = B->rmap->rend/bs; 1985 1986 b->cstartbs = B->cmap->rstart/bs; 1987 b->cendbs = B->cmap->rend/bs; 1988 1989 #if defined(PETSC_USE_CTABLE) 1990 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 1991 #else 1992 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 1993 #endif 1994 ierr = PetscFree(b->garray);CHKERRQ(ierr); 1995 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 1996 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 1997 ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr); 1998 ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr); 1999 ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr); 2000 ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr); 2001 ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr); 2002 ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr); 2003 2004 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2005 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2006 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2007 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2008 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2009 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2010 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2011 2012 if (!B->preallocated) { 2013 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2014 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2015 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 2016 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2017 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2018 } 2019 2020 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2021 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2022 2023 B->preallocated = PETSC_TRUE; 2024 B->was_assembled = PETSC_FALSE; 2025 B->assembled = PETSC_FALSE; 2026 PetscFunctionReturn(0); 2027 } 2028 2029 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2030 { 2031 PetscInt m,rstart,cend; 2032 PetscInt i,j,d,nz,bd, nz_max=0,*d_nnz=NULL,*o_nnz=NULL; 2033 const PetscInt *JJ =NULL; 2034 PetscScalar *values=NULL; 2035 PetscBool roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented; 2036 PetscErrorCode ierr; 2037 PetscBool nooffprocentries; 2038 2039 PetscFunctionBegin; 2040 PetscCheckFalse(bs < 1,PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs); 2041 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2042 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2043 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2044 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2045 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2046 m = B->rmap->n/bs; 2047 rstart = B->rmap->rstart/bs; 2048 cend = B->cmap->rend/bs; 2049 2050 PetscCheckFalse(ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]); 2051 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2052 for (i=0; i<m; i++) { 2053 nz = ii[i+1] - ii[i]; 2054 PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz); 2055 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2056 JJ = jj + ii[i]; 2057 bd = 0; 2058 for (j=0; j<nz; j++) { 2059 if (*JJ >= i + rstart) break; 2060 JJ++; 2061 bd++; 2062 } 2063 d = 0; 2064 for (; j<nz; j++) { 2065 if (*JJ++ >= cend) break; 2066 d++; 2067 } 2068 d_nnz[i] = d; 2069 o_nnz[i] = nz - d - bd; 2070 nz = nz - bd; 2071 nz_max = PetscMax(nz_max,nz); 2072 } 2073 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2074 ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2075 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2076 2077 values = (PetscScalar*)V; 2078 if (!values) { 2079 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2080 } 2081 for (i=0; i<m; i++) { 2082 PetscInt row = i + rstart; 2083 PetscInt ncols = ii[i+1] - ii[i]; 2084 const PetscInt *icols = jj + ii[i]; 2085 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2086 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2087 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2088 } else { /* block ordering does not match so we can only insert one block at a time. */ 2089 PetscInt j; 2090 for (j=0; j<ncols; j++) { 2091 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2092 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2093 } 2094 } 2095 } 2096 2097 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2098 nooffprocentries = B->nooffprocentries; 2099 B->nooffprocentries = PETSC_TRUE; 2100 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2101 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2102 B->nooffprocentries = nooffprocentries; 2103 2104 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2105 PetscFunctionReturn(0); 2106 } 2107 2108 /*MC 2109 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2110 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2111 the matrix is stored. 2112 2113 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2114 can call MatSetOption(Mat, MAT_HERMITIAN); 2115 2116 Options Database Keys: 2117 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2118 2119 Notes: 2120 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2121 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2122 2123 Level: beginner 2124 2125 .seealso: MatCreateBAIJ(), MATSEQSBAIJ, MatType 2126 M*/ 2127 2128 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2129 { 2130 Mat_MPISBAIJ *b; 2131 PetscErrorCode ierr; 2132 PetscBool flg = PETSC_FALSE; 2133 2134 PetscFunctionBegin; 2135 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2136 B->data = (void*)b; 2137 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2138 2139 B->ops->destroy = MatDestroy_MPISBAIJ; 2140 B->ops->view = MatView_MPISBAIJ; 2141 B->assembled = PETSC_FALSE; 2142 B->insertmode = NOT_SET_VALUES; 2143 2144 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 2145 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRMPI(ierr); 2146 2147 /* build local table of row and column ownerships */ 2148 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2149 2150 /* build cache for off array entries formed */ 2151 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2152 2153 b->donotstash = PETSC_FALSE; 2154 b->colmap = NULL; 2155 b->garray = NULL; 2156 b->roworiented = PETSC_TRUE; 2157 2158 /* stuff used in block assembly */ 2159 b->barray = NULL; 2160 2161 /* stuff used for matrix vector multiply */ 2162 b->lvec = NULL; 2163 b->Mvctx = NULL; 2164 b->slvec0 = NULL; 2165 b->slvec0b = NULL; 2166 b->slvec1 = NULL; 2167 b->slvec1a = NULL; 2168 b->slvec1b = NULL; 2169 b->sMvctx = NULL; 2170 2171 /* stuff for MatGetRow() */ 2172 b->rowindices = NULL; 2173 b->rowvalues = NULL; 2174 b->getrowactive = PETSC_FALSE; 2175 2176 /* hash table stuff */ 2177 b->ht = NULL; 2178 b->hd = NULL; 2179 b->ht_size = 0; 2180 b->ht_flag = PETSC_FALSE; 2181 b->ht_fact = 0; 2182 b->ht_total_ct = 0; 2183 b->ht_insert_ct = 0; 2184 2185 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2186 b->ijonly = PETSC_FALSE; 2187 2188 b->in_loc = NULL; 2189 b->v_loc = NULL; 2190 b->n_loc = 0; 2191 2192 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2193 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2194 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2195 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2196 #if defined(PETSC_HAVE_ELEMENTAL) 2197 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2198 #endif 2199 #if defined(PETSC_HAVE_SCALAPACK) 2200 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK);CHKERRQ(ierr); 2201 #endif 2202 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr); 2203 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr); 2204 2205 B->symmetric = PETSC_TRUE; 2206 B->structurally_symmetric = PETSC_TRUE; 2207 B->symmetric_set = PETSC_TRUE; 2208 B->structurally_symmetric_set = PETSC_TRUE; 2209 B->symmetric_eternal = PETSC_TRUE; 2210 #if defined(PETSC_USE_COMPLEX) 2211 B->hermitian = PETSC_FALSE; 2212 B->hermitian_set = PETSC_FALSE; 2213 #else 2214 B->hermitian = PETSC_TRUE; 2215 B->hermitian_set = PETSC_TRUE; 2216 #endif 2217 2218 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2219 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2220 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2221 if (flg) { 2222 PetscReal fact = 1.39; 2223 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2224 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2225 if (fact <= 1.0) fact = 1.39; 2226 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2227 ierr = PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact);CHKERRQ(ierr); 2228 } 2229 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } 2232 2233 /*MC 2234 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2235 2236 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2237 and MATMPISBAIJ otherwise. 2238 2239 Options Database Keys: 2240 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2241 2242 Level: beginner 2243 2244 .seealso: MatCreateSBAIJ, MATSEQSBAIJ, MATMPISBAIJ 2245 M*/ 2246 2247 /*@C 2248 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2249 the user should preallocate the matrix storage by setting the parameters 2250 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2251 performance can be increased by more than a factor of 50. 2252 2253 Collective on Mat 2254 2255 Input Parameters: 2256 + B - the matrix 2257 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2258 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2259 . d_nz - number of block nonzeros per block row in diagonal portion of local 2260 submatrix (same for all local rows) 2261 . d_nnz - array containing the number of block nonzeros in the various block rows 2262 in the upper triangular and diagonal part of the in diagonal portion of the local 2263 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2264 for the diagonal entry and set a value even if it is zero. 2265 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2266 submatrix (same for all local rows). 2267 - o_nnz - array containing the number of nonzeros in the various block rows of the 2268 off-diagonal portion of the local submatrix that is right of the diagonal 2269 (possibly different for each block row) or NULL. 2270 2271 Options Database Keys: 2272 + -mat_no_unroll - uses code that does not unroll the loops in the 2273 block calculations (much slower) 2274 - -mat_block_size - size of the blocks to use 2275 2276 Notes: 2277 2278 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2279 than it must be used on all processors that share the object for that argument. 2280 2281 If the *_nnz parameter is given then the *_nz parameter is ignored 2282 2283 Storage Information: 2284 For a square global matrix we define each processor's diagonal portion 2285 to be its local rows and the corresponding columns (a square submatrix); 2286 each processor's off-diagonal portion encompasses the remainder of the 2287 local matrix (a rectangular submatrix). 2288 2289 The user can specify preallocated storage for the diagonal part of 2290 the local submatrix with either d_nz or d_nnz (not both). Set 2291 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2292 memory allocation. Likewise, specify preallocated storage for the 2293 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2294 2295 You can call MatGetInfo() to get information on how effective the preallocation was; 2296 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2297 You can also run with the option -info and look for messages with the string 2298 malloc in them to see if additional memory allocation was needed. 2299 2300 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2301 the figure below we depict these three local rows and all columns (0-11). 2302 2303 .vb 2304 0 1 2 3 4 5 6 7 8 9 10 11 2305 -------------------------- 2306 row 3 |. . . d d d o o o o o o 2307 row 4 |. . . d d d o o o o o o 2308 row 5 |. . . d d d o o o o o o 2309 -------------------------- 2310 .ve 2311 2312 Thus, any entries in the d locations are stored in the d (diagonal) 2313 submatrix, and any entries in the o locations are stored in the 2314 o (off-diagonal) submatrix. Note that the d matrix is stored in 2315 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2316 2317 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2318 plus the diagonal part of the d matrix, 2319 and o_nz should indicate the number of block nonzeros per row in the o matrix 2320 2321 In general, for PDE problems in which most nonzeros are near the diagonal, 2322 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2323 or you will get TERRIBLE performance; see the users' manual chapter on 2324 matrices. 2325 2326 Level: intermediate 2327 2328 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2329 @*/ 2330 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2331 { 2332 PetscErrorCode ierr; 2333 2334 PetscFunctionBegin; 2335 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2336 PetscValidType(B,1); 2337 PetscValidLogicalCollectiveInt(B,bs,2); 2338 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2339 PetscFunctionReturn(0); 2340 } 2341 2342 /*@C 2343 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2344 (block compressed row). For good matrix assembly performance 2345 the user should preallocate the matrix storage by setting the parameters 2346 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2347 performance can be increased by more than a factor of 50. 2348 2349 Collective 2350 2351 Input Parameters: 2352 + comm - MPI communicator 2353 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2354 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2355 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2356 This value should be the same as the local size used in creating the 2357 y vector for the matrix-vector product y = Ax. 2358 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2359 This value should be the same as the local size used in creating the 2360 x vector for the matrix-vector product y = Ax. 2361 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2362 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2363 . d_nz - number of block nonzeros per block row in diagonal portion of local 2364 submatrix (same for all local rows) 2365 . d_nnz - array containing the number of block nonzeros in the various block rows 2366 in the upper triangular portion of the in diagonal portion of the local 2367 (possibly different for each block block row) or NULL. 2368 If you plan to factor the matrix you must leave room for the diagonal entry and 2369 set its value even if it is zero. 2370 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2371 submatrix (same for all local rows). 2372 - o_nnz - array containing the number of nonzeros in the various block rows of the 2373 off-diagonal portion of the local submatrix (possibly different for 2374 each block row) or NULL. 2375 2376 Output Parameter: 2377 . A - the matrix 2378 2379 Options Database Keys: 2380 + -mat_no_unroll - uses code that does not unroll the loops in the 2381 block calculations (much slower) 2382 . -mat_block_size - size of the blocks to use 2383 - -mat_mpi - use the parallel matrix data structures even on one processor 2384 (defaults to using SeqBAIJ format on one processor) 2385 2386 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2387 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2388 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2389 2390 Notes: 2391 The number of rows and columns must be divisible by blocksize. 2392 This matrix type does not support complex Hermitian operation. 2393 2394 The user MUST specify either the local or global matrix dimensions 2395 (possibly both). 2396 2397 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2398 than it must be used on all processors that share the object for that argument. 2399 2400 If the *_nnz parameter is given then the *_nz parameter is ignored 2401 2402 Storage Information: 2403 For a square global matrix we define each processor's diagonal portion 2404 to be its local rows and the corresponding columns (a square submatrix); 2405 each processor's off-diagonal portion encompasses the remainder of the 2406 local matrix (a rectangular submatrix). 2407 2408 The user can specify preallocated storage for the diagonal part of 2409 the local submatrix with either d_nz or d_nnz (not both). Set 2410 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2411 memory allocation. Likewise, specify preallocated storage for the 2412 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2413 2414 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2415 the figure below we depict these three local rows and all columns (0-11). 2416 2417 .vb 2418 0 1 2 3 4 5 6 7 8 9 10 11 2419 -------------------------- 2420 row 3 |. . . d d d o o o o o o 2421 row 4 |. . . d d d o o o o o o 2422 row 5 |. . . d d d o o o o o o 2423 -------------------------- 2424 .ve 2425 2426 Thus, any entries in the d locations are stored in the d (diagonal) 2427 submatrix, and any entries in the o locations are stored in the 2428 o (off-diagonal) submatrix. Note that the d matrix is stored in 2429 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2430 2431 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2432 plus the diagonal part of the d matrix, 2433 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2434 In general, for PDE problems in which most nonzeros are near the diagonal, 2435 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2436 or you will get TERRIBLE performance; see the users' manual chapter on 2437 matrices. 2438 2439 Level: intermediate 2440 2441 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2442 @*/ 2443 2444 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2445 { 2446 PetscErrorCode ierr; 2447 PetscMPIInt size; 2448 2449 PetscFunctionBegin; 2450 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2451 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2452 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2453 if (size > 1) { 2454 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2455 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2456 } else { 2457 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2458 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2459 } 2460 PetscFunctionReturn(0); 2461 } 2462 2463 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2464 { 2465 Mat mat; 2466 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2467 PetscErrorCode ierr; 2468 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2469 PetscScalar *array; 2470 2471 PetscFunctionBegin; 2472 *newmat = NULL; 2473 2474 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2475 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2476 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2477 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2478 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2479 2480 mat->factortype = matin->factortype; 2481 mat->preallocated = PETSC_TRUE; 2482 mat->assembled = PETSC_TRUE; 2483 mat->insertmode = NOT_SET_VALUES; 2484 2485 a = (Mat_MPISBAIJ*)mat->data; 2486 a->bs2 = oldmat->bs2; 2487 a->mbs = oldmat->mbs; 2488 a->nbs = oldmat->nbs; 2489 a->Mbs = oldmat->Mbs; 2490 a->Nbs = oldmat->Nbs; 2491 2492 a->size = oldmat->size; 2493 a->rank = oldmat->rank; 2494 a->donotstash = oldmat->donotstash; 2495 a->roworiented = oldmat->roworiented; 2496 a->rowindices = NULL; 2497 a->rowvalues = NULL; 2498 a->getrowactive = PETSC_FALSE; 2499 a->barray = NULL; 2500 a->rstartbs = oldmat->rstartbs; 2501 a->rendbs = oldmat->rendbs; 2502 a->cstartbs = oldmat->cstartbs; 2503 a->cendbs = oldmat->cendbs; 2504 2505 /* hash table stuff */ 2506 a->ht = NULL; 2507 a->hd = NULL; 2508 a->ht_size = 0; 2509 a->ht_flag = oldmat->ht_flag; 2510 a->ht_fact = oldmat->ht_fact; 2511 a->ht_total_ct = 0; 2512 a->ht_insert_ct = 0; 2513 2514 ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr); 2515 if (oldmat->colmap) { 2516 #if defined(PETSC_USE_CTABLE) 2517 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2518 #else 2519 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2520 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2521 ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr); 2522 #endif 2523 } else a->colmap = NULL; 2524 2525 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2526 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2527 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2528 ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); 2529 } else a->garray = NULL; 2530 2531 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2532 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2533 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2534 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2535 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2536 2537 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2538 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2539 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2540 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2541 2542 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2543 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2544 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2545 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2546 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2547 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2548 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2549 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2550 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2551 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2552 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2553 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2554 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2555 2556 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2557 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2558 a->sMvctx = oldmat->sMvctx; 2559 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2560 2561 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2562 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2563 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2564 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2565 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2566 *newmat = mat; 2567 PetscFunctionReturn(0); 2568 } 2569 2570 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2571 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2572 2573 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer) 2574 { 2575 PetscErrorCode ierr; 2576 PetscBool isbinary; 2577 2578 PetscFunctionBegin; 2579 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2580 PetscCheckFalse(!isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 2581 ierr = MatLoad_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 2582 PetscFunctionReturn(0); 2583 } 2584 2585 /*XXXXX@ 2586 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2587 2588 Input Parameters: 2589 . mat - the matrix 2590 . fact - factor 2591 2592 Not Collective on Mat, each process can have a different hash factor 2593 2594 Level: advanced 2595 2596 Notes: 2597 This can also be set by the command line option: -mat_use_hash_table fact 2598 2599 .seealso: MatSetOption() 2600 @XXXXX*/ 2601 2602 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2603 { 2604 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2605 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2606 PetscReal atmp; 2607 PetscReal *work,*svalues,*rvalues; 2608 PetscErrorCode ierr; 2609 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2610 PetscMPIInt rank,size; 2611 PetscInt *rowners_bs,dest,count,source; 2612 PetscScalar *va; 2613 MatScalar *ba; 2614 MPI_Status stat; 2615 2616 PetscFunctionBegin; 2617 PetscCheckFalse(idx,PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2618 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2619 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2620 2621 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 2622 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 2623 2624 bs = A->rmap->bs; 2625 mbs = a->mbs; 2626 Mbs = a->Mbs; 2627 ba = b->a; 2628 bi = b->i; 2629 bj = b->j; 2630 2631 /* find ownerships */ 2632 rowners_bs = A->rmap->range; 2633 2634 /* each proc creates an array to be distributed */ 2635 ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2636 2637 /* row_max for B */ 2638 if (rank != size-1) { 2639 for (i=0; i<mbs; i++) { 2640 ncols = bi[1] - bi[0]; bi++; 2641 brow = bs*i; 2642 for (j=0; j<ncols; j++) { 2643 bcol = bs*(*bj); 2644 for (kcol=0; kcol<bs; kcol++) { 2645 col = bcol + kcol; /* local col index */ 2646 col += rowners_bs[rank+1]; /* global col index */ 2647 for (krow=0; krow<bs; krow++) { 2648 atmp = PetscAbsScalar(*ba); ba++; 2649 row = brow + krow; /* local row index */ 2650 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2651 if (work[col] < atmp) work[col] = atmp; 2652 } 2653 } 2654 bj++; 2655 } 2656 } 2657 2658 /* send values to its owners */ 2659 for (dest=rank+1; dest<size; dest++) { 2660 svalues = work + rowners_bs[dest]; 2661 count = rowners_bs[dest+1]-rowners_bs[dest]; 2662 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2663 } 2664 } 2665 2666 /* receive values */ 2667 if (rank) { 2668 rvalues = work; 2669 count = rowners_bs[rank+1]-rowners_bs[rank]; 2670 for (source=0; source<rank; source++) { 2671 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRMPI(ierr); 2672 /* process values */ 2673 for (i=0; i<count; i++) { 2674 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2675 } 2676 } 2677 } 2678 2679 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2680 ierr = PetscFree(work);CHKERRQ(ierr); 2681 PetscFunctionReturn(0); 2682 } 2683 2684 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2685 { 2686 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2687 PetscErrorCode ierr; 2688 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2689 PetscScalar *x,*ptr,*from; 2690 Vec bb1; 2691 const PetscScalar *b; 2692 2693 PetscFunctionBegin; 2694 PetscCheckFalse(its <= 0 || lits <= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits); 2695 PetscCheckFalse(bs > 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2696 2697 if (flag == SOR_APPLY_UPPER) { 2698 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2699 PetscFunctionReturn(0); 2700 } 2701 2702 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2703 if (flag & SOR_ZERO_INITIAL_GUESS) { 2704 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2705 its--; 2706 } 2707 2708 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2709 while (its--) { 2710 2711 /* lower triangular part: slvec0b = - B^T*xx */ 2712 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2713 2714 /* copy xx into slvec0a */ 2715 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2716 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2717 ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr); 2718 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2719 2720 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2721 2722 /* copy bb into slvec1a */ 2723 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2724 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2725 ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr); 2726 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2727 2728 /* set slvec1b = 0 */ 2729 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2730 2731 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2732 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2733 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2734 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2735 2736 /* upper triangular part: bb1 = bb1 - B*x */ 2737 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2738 2739 /* local diagonal sweep */ 2740 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2741 } 2742 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2743 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2744 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2745 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2746 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2747 } else if (flag & SOR_EISENSTAT) { 2748 Vec xx1; 2749 PetscBool hasop; 2750 const PetscScalar *diag; 2751 PetscScalar *sl,scale = (omega - 2.0)/omega; 2752 PetscInt i,n; 2753 2754 if (!mat->xx1) { 2755 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2756 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2757 } 2758 xx1 = mat->xx1; 2759 bb1 = mat->bb1; 2760 2761 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2762 2763 if (!mat->diag) { 2764 /* this is wrong for same matrix with new nonzero values */ 2765 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2766 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2767 } 2768 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2769 2770 if (hasop) { 2771 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2772 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2773 } else { 2774 /* 2775 These two lines are replaced by code that may be a bit faster for a good compiler 2776 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2777 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2778 */ 2779 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2780 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2781 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2782 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2783 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2784 if (omega == 1.0) { 2785 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2786 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2787 } else { 2788 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2789 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2790 } 2791 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2792 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2793 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2794 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2795 } 2796 2797 /* multiply off-diagonal portion of matrix */ 2798 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2799 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2800 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2801 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2802 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 2803 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2804 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2805 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2806 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2807 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2808 2809 /* local sweep */ 2810 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2811 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2812 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2813 PetscFunctionReturn(0); 2814 } 2815 2816 /*@ 2817 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2818 CSR format the local rows. 2819 2820 Collective 2821 2822 Input Parameters: 2823 + comm - MPI communicator 2824 . bs - the block size, only a block size of 1 is supported 2825 . m - number of local rows (Cannot be PETSC_DECIDE) 2826 . n - This value should be the same as the local size used in creating the 2827 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2828 calculated if N is given) For square matrices n is almost always m. 2829 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2830 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2831 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2832 . j - column indices 2833 - a - matrix values 2834 2835 Output Parameter: 2836 . mat - the matrix 2837 2838 Level: intermediate 2839 2840 Notes: 2841 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2842 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2843 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2844 2845 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2846 2847 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2848 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 2849 @*/ 2850 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2851 { 2852 PetscErrorCode ierr; 2853 2854 PetscFunctionBegin; 2855 PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2856 PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2857 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2858 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2859 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2860 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2861 PetscFunctionReturn(0); 2862 } 2863 2864 /*@C 2865 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values 2866 2867 Collective 2868 2869 Input Parameters: 2870 + B - the matrix 2871 . bs - the block size 2872 . i - the indices into j for the start of each local row (starts with zero) 2873 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2874 - v - optional values in the matrix 2875 2876 Level: advanced 2877 2878 Notes: 2879 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2880 and usually the numerical values as well 2881 2882 Any entries below the diagonal are ignored 2883 2884 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2885 @*/ 2886 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2887 { 2888 PetscErrorCode ierr; 2889 2890 PetscFunctionBegin; 2891 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2892 PetscFunctionReturn(0); 2893 } 2894 2895 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2896 { 2897 PetscErrorCode ierr; 2898 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 2899 PetscInt *indx; 2900 PetscScalar *values; 2901 2902 PetscFunctionBegin; 2903 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2904 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2905 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 2906 PetscInt *dnz,*onz,mbs,Nbs,nbs; 2907 PetscInt *bindx,rmax=a->rmax,j; 2908 PetscMPIInt rank,size; 2909 2910 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2911 mbs = m/bs; Nbs = N/cbs; 2912 if (n == PETSC_DECIDE) { 2913 ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);CHKERRQ(ierr); 2914 } 2915 nbs = n/cbs; 2916 2917 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 2918 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 2919 2920 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 2921 ierr = MPI_Comm_rank(comm,&size);CHKERRMPI(ierr); 2922 if (rank == size-1) { 2923 /* Check sum(nbs) = Nbs */ 2924 PetscCheckFalse(__end != Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs); 2925 } 2926 2927 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 2928 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2929 for (i=0; i<mbs; i++) { 2930 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 2931 nnz = nnz/bs; 2932 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 2933 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 2934 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 2935 } 2936 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 2937 ierr = PetscFree(bindx);CHKERRQ(ierr); 2938 2939 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2940 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2941 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 2942 ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr); 2943 ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 2944 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2945 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2946 } 2947 2948 /* numeric phase */ 2949 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2950 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 2951 2952 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2953 for (i=0; i<m; i++) { 2954 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2955 Ii = i + rstart; 2956 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2957 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2958 } 2959 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 2960 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2961 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2962 PetscFunctionReturn(0); 2963 } 2964