1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 12 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 19 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 20 PetscFunctionReturn(0); 21 } 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 25 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 26 { 27 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 32 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 33 PetscFunctionReturn(0); 34 } 35 36 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 37 { \ 38 \ 39 brow = row/bs; \ 40 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 41 rmax = aimax[brow]; nrow = ailen[brow]; \ 42 bcol = col/bs; \ 43 ridx = row % bs; cidx = col % bs; \ 44 low = 0; high = nrow; \ 45 while (high-low > 3) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > bcol) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > bcol) break; \ 52 if (rp[_i] == bcol) { \ 53 bap = ap + bs2*_i + bs*cidx + ridx; \ 54 if (addv == ADD_VALUES) *bap += value; \ 55 else *bap = value; \ 56 goto a_noinsert; \ 57 } \ 58 } \ 59 if (a->nonew == 1) goto a_noinsert; \ 60 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 61 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 62 N = nrow++ - 1; \ 63 /* shift up all the later entries in this row */ \ 64 for (ii=N; ii>=_i; ii--) { \ 65 rp[ii+1] = rp[ii]; \ 66 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 67 } \ 68 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 69 rp[_i] = bcol; \ 70 ap[bs2*_i + bs*cidx + ridx] = value; \ 71 A->nonzerostate++;\ 72 a_noinsert:; \ 73 ailen[brow] = nrow; \ 74 } 75 76 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 77 { \ 78 brow = row/bs; \ 79 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 80 rmax = bimax[brow]; nrow = bilen[brow]; \ 81 bcol = col/bs; \ 82 ridx = row % bs; cidx = col % bs; \ 83 low = 0; high = nrow; \ 84 while (high-low > 3) { \ 85 t = (low+high)/2; \ 86 if (rp[t] > bcol) high = t; \ 87 else low = t; \ 88 } \ 89 for (_i=low; _i<high; _i++) { \ 90 if (rp[_i] > bcol) break; \ 91 if (rp[_i] == bcol) { \ 92 bap = ap + bs2*_i + bs*cidx + ridx; \ 93 if (addv == ADD_VALUES) *bap += value; \ 94 else *bap = value; \ 95 goto b_noinsert; \ 96 } \ 97 } \ 98 if (b->nonew == 1) goto b_noinsert; \ 99 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 100 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for (ii=N; ii>=_i; ii--) { \ 104 rp[ii+1] = rp[ii]; \ 105 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 106 } \ 107 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 B->nonzerostate++;\ 111 b_noinsert:; \ 112 bilen[brow] = nrow; \ 113 } 114 115 /* Only add/insert a(i,j) with i<=j (blocks). 116 Any a(i,j) with i>j input by user is ingored. 117 */ 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatSetValues_MPISBAIJ" 120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 121 { 122 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 123 MatScalar value; 124 PetscBool roworiented = baij->roworiented; 125 PetscErrorCode ierr; 126 PetscInt i,j,row,col; 127 PetscInt rstart_orig=mat->rmap->rstart; 128 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 129 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 130 131 /* Some Variables required in the macro */ 132 Mat A = baij->A; 133 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 134 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 135 MatScalar *aa =a->a; 136 137 Mat B = baij->B; 138 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 139 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 140 MatScalar *ba =b->a; 141 142 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 143 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 144 MatScalar *ap,*bap; 145 146 /* for stash */ 147 PetscInt n_loc, *in_loc = NULL; 148 MatScalar *v_loc = NULL; 149 150 PetscFunctionBegin; 151 if (!baij->donotstash) { 152 if (n > baij->n_loc) { 153 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 154 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 155 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 156 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 157 158 baij->n_loc = n; 159 } 160 in_loc = baij->in_loc; 161 v_loc = baij->v_loc; 162 } 163 164 for (i=0; i<m; i++) { 165 if (im[i] < 0) continue; 166 #if defined(PETSC_USE_DEBUG) 167 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 168 #endif 169 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 170 row = im[i] - rstart_orig; /* local row index */ 171 for (j=0; j<n; j++) { 172 if (im[i]/bs > in[j]/bs) { 173 if (a->ignore_ltriangular) { 174 continue; /* ignore lower triangular blocks */ 175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 176 } 177 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 178 col = in[j] - cstart_orig; /* local col index */ 179 brow = row/bs; bcol = col/bs; 180 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 181 if (roworiented) value = v[i*n+j]; 182 else value = v[i+j*m]; 183 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 184 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 185 } else if (in[j] < 0) continue; 186 #if defined(PETSC_USE_DEBUG) 187 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 188 #endif 189 else { /* off-diag entry (B) */ 190 if (mat->was_assembled) { 191 if (!baij->colmap) { 192 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 193 } 194 #if defined(PETSC_USE_CTABLE) 195 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 196 col = col - 1; 197 #else 198 col = baij->colmap[in[j]/bs] - 1; 199 #endif 200 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 201 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 202 col = in[j]; 203 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 204 B = baij->B; 205 b = (Mat_SeqBAIJ*)(B)->data; 206 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 207 ba = b->a; 208 } else col += in[j]%bs; 209 } else col = in[j]; 210 if (roworiented) value = v[i*n+j]; 211 else value = v[i+j*m]; 212 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 213 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 } 216 } else { /* off processor entry */ 217 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 218 if (!baij->donotstash) { 219 mat->assembled = PETSC_FALSE; 220 n_loc = 0; 221 for (j=0; j<n; j++) { 222 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 223 in_loc[n_loc] = in[j]; 224 if (roworiented) { 225 v_loc[n_loc] = v[i*n+j]; 226 } else { 227 v_loc[n_loc] = v[j*m+i]; 228 } 229 n_loc++; 230 } 231 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined" 240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 241 { 242 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 243 PetscErrorCode ierr; 244 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 245 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 246 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 247 PetscBool roworiented=a->roworiented; 248 const PetscScalar *value = v; 249 MatScalar *ap,*aa = a->a,*bap; 250 251 PetscFunctionBegin; 252 if (col < row) { 253 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 254 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 255 } 256 rp = aj + ai[row]; 257 ap = aa + bs2*ai[row]; 258 rmax = imax[row]; 259 nrow = ailen[row]; 260 value = v; 261 low = 0; 262 high = nrow; 263 264 while (high-low > 7) { 265 t = (low+high)/2; 266 if (rp[t] > col) high = t; 267 else low = t; 268 } 269 for (i=low; i<high; i++) { 270 if (rp[i] > col) break; 271 if (rp[i] == col) { 272 bap = ap + bs2*i; 273 if (roworiented) { 274 if (is == ADD_VALUES) { 275 for (ii=0; ii<bs; ii++) { 276 for (jj=ii; jj<bs2; jj+=bs) { 277 bap[jj] += *value++; 278 } 279 } 280 } else { 281 for (ii=0; ii<bs; ii++) { 282 for (jj=ii; jj<bs2; jj+=bs) { 283 bap[jj] = *value++; 284 } 285 } 286 } 287 } else { 288 if (is == ADD_VALUES) { 289 for (ii=0; ii<bs; ii++) { 290 for (jj=0; jj<bs; jj++) { 291 *bap++ += *value++; 292 } 293 } 294 } else { 295 for (ii=0; ii<bs; ii++) { 296 for (jj=0; jj<bs; jj++) { 297 *bap++ = *value++; 298 } 299 } 300 } 301 } 302 goto noinsert2; 303 } 304 } 305 if (nonew == 1) goto noinsert2; 306 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 307 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 308 N = nrow++ - 1; high++; 309 /* shift up all the later entries in this row */ 310 for (ii=N; ii>=i; ii--) { 311 rp[ii+1] = rp[ii]; 312 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 313 } 314 if (N >= i) { 315 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 316 } 317 rp[i] = col; 318 bap = ap + bs2*i; 319 if (roworiented) { 320 for (ii=0; ii<bs; ii++) { 321 for (jj=ii; jj<bs2; jj+=bs) { 322 bap[jj] = *value++; 323 } 324 } 325 } else { 326 for (ii=0; ii<bs; ii++) { 327 for (jj=0; jj<bs; jj++) { 328 *bap++ = *value++; 329 } 330 } 331 } 332 noinsert2:; 333 ailen[row] = nrow; 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined" 339 /* 340 This routine is exactly duplicated in mpibaij.c 341 */ 342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 343 { 344 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 345 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 346 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 347 PetscErrorCode ierr; 348 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 349 PetscBool roworiented=a->roworiented; 350 const PetscScalar *value = v; 351 MatScalar *ap,*aa = a->a,*bap; 352 353 PetscFunctionBegin; 354 rp = aj + ai[row]; 355 ap = aa + bs2*ai[row]; 356 rmax = imax[row]; 357 nrow = ailen[row]; 358 low = 0; 359 high = nrow; 360 value = v; 361 while (high-low > 7) { 362 t = (low+high)/2; 363 if (rp[t] > col) high = t; 364 else low = t; 365 } 366 for (i=low; i<high; i++) { 367 if (rp[i] > col) break; 368 if (rp[i] == col) { 369 bap = ap + bs2*i; 370 if (roworiented) { 371 if (is == ADD_VALUES) { 372 for (ii=0; ii<bs; ii++) { 373 for (jj=ii; jj<bs2; jj+=bs) { 374 bap[jj] += *value++; 375 } 376 } 377 } else { 378 for (ii=0; ii<bs; ii++) { 379 for (jj=ii; jj<bs2; jj+=bs) { 380 bap[jj] = *value++; 381 } 382 } 383 } 384 } else { 385 if (is == ADD_VALUES) { 386 for (ii=0; ii<bs; ii++,value+=bs) { 387 for (jj=0; jj<bs; jj++) { 388 bap[jj] += value[jj]; 389 } 390 bap += bs; 391 } 392 } else { 393 for (ii=0; ii<bs; ii++,value+=bs) { 394 for (jj=0; jj<bs; jj++) { 395 bap[jj] = value[jj]; 396 } 397 bap += bs; 398 } 399 } 400 } 401 goto noinsert2; 402 } 403 } 404 if (nonew == 1) goto noinsert2; 405 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 406 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 407 N = nrow++ - 1; high++; 408 /* shift up all the later entries in this row */ 409 for (ii=N; ii>=i; ii--) { 410 rp[ii+1] = rp[ii]; 411 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 412 } 413 if (N >= i) { 414 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 415 } 416 rp[i] = col; 417 bap = ap + bs2*i; 418 if (roworiented) { 419 for (ii=0; ii<bs; ii++) { 420 for (jj=ii; jj<bs2; jj+=bs) { 421 bap[jj] = *value++; 422 } 423 } 424 } else { 425 for (ii=0; ii<bs; ii++) { 426 for (jj=0; jj<bs; jj++) { 427 *bap++ = *value++; 428 } 429 } 430 } 431 noinsert2:; 432 ailen[row] = nrow; 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNCT__ 437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 438 /* 439 This routine could be optimized by removing the need for the block copy below and passing stride information 440 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 441 */ 442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 443 { 444 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 445 const MatScalar *value; 446 MatScalar *barray =baij->barray; 447 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 448 PetscErrorCode ierr; 449 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 450 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 451 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 452 453 PetscFunctionBegin; 454 if (!barray) { 455 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 456 baij->barray = barray; 457 } 458 459 if (roworiented) { 460 stepval = (n-1)*bs; 461 } else { 462 stepval = (m-1)*bs; 463 } 464 for (i=0; i<m; i++) { 465 if (im[i] < 0) continue; 466 #if defined(PETSC_USE_DEBUG) 467 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 468 #endif 469 if (im[i] >= rstart && im[i] < rend) { 470 row = im[i] - rstart; 471 for (j=0; j<n; j++) { 472 if (im[i] > in[j]) { 473 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 474 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 475 } 476 /* If NumCol = 1 then a copy is not required */ 477 if ((roworiented) && (n == 1)) { 478 barray = (MatScalar*) v + i*bs2; 479 } else if ((!roworiented) && (m == 1)) { 480 barray = (MatScalar*) v + j*bs2; 481 } else { /* Here a copy is required */ 482 if (roworiented) { 483 value = v + i*(stepval+bs)*bs + j*bs; 484 } else { 485 value = v + j*(stepval+bs)*bs + i*bs; 486 } 487 for (ii=0; ii<bs; ii++,value+=stepval) { 488 for (jj=0; jj<bs; jj++) { 489 *barray++ = *value++; 490 } 491 } 492 barray -=bs2; 493 } 494 495 if (in[j] >= cstart && in[j] < cend) { 496 col = in[j] - cstart; 497 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 498 } else if (in[j] < 0) continue; 499 #if defined(PETSC_USE_DEBUG) 500 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 501 #endif 502 else { 503 if (mat->was_assembled) { 504 if (!baij->colmap) { 505 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 506 } 507 508 #if defined(PETSC_USE_DEBUG) 509 #if defined(PETSC_USE_CTABLE) 510 { PetscInt data; 511 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 512 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 513 } 514 #else 515 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 516 #endif 517 #endif 518 #if defined(PETSC_USE_CTABLE) 519 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col = (col - 1)/bs; 521 #else 522 col = (baij->colmap[in[j]] - 1)/bs; 523 #endif 524 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 } 528 } else col = in[j]; 529 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 530 } 531 } 532 } else { 533 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 534 if (!baij->donotstash) { 535 if (roworiented) { 536 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 537 } else { 538 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 539 } 540 } 541 } 542 } 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_MPISBAIJ" 548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 549 { 550 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 551 PetscErrorCode ierr; 552 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 553 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 554 555 PetscFunctionBegin; 556 for (i=0; i<m; i++) { 557 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 558 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 559 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 560 row = idxm[i] - bsrstart; 561 for (j=0; j<n; j++) { 562 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 563 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 564 if (idxn[j] >= bscstart && idxn[j] < bscend) { 565 col = idxn[j] - bscstart; 566 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 567 } else { 568 if (!baij->colmap) { 569 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 570 } 571 #if defined(PETSC_USE_CTABLE) 572 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 573 data--; 574 #else 575 data = baij->colmap[idxn[j]/bs]-1; 576 #endif 577 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 578 else { 579 col = data + idxn[j]%bs; 580 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } 582 } 583 } 584 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatNorm_MPISBAIJ" 591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 592 { 593 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscReal sum[2],*lnorm2; 596 597 PetscFunctionBegin; 598 if (baij->size == 1) { 599 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 600 } else { 601 if (type == NORM_FROBENIUS) { 602 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 603 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 605 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 606 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 607 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 608 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 609 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 610 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 611 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 612 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 613 PetscReal *rsum,*rsum2,vabs; 614 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 615 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 616 MatScalar *v; 617 618 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 619 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 620 /* Amat */ 621 v = amat->a; jj = amat->j; 622 for (brow=0; brow<mbs; brow++) { 623 grow = bs*(rstart + brow); 624 nz = amat->i[brow+1] - amat->i[brow]; 625 for (bcol=0; bcol<nz; bcol++) { 626 gcol = bs*(rstart + *jj); jj++; 627 for (col=0; col<bs; col++) { 628 for (row=0; row<bs; row++) { 629 vabs = PetscAbsScalar(*v); v++; 630 rsum[gcol+col] += vabs; 631 /* non-diagonal block */ 632 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 633 } 634 } 635 } 636 } 637 /* Bmat */ 638 v = bmat->a; jj = bmat->j; 639 for (brow=0; brow<mbs; brow++) { 640 grow = bs*(rstart + brow); 641 nz = bmat->i[brow+1] - bmat->i[brow]; 642 for (bcol=0; bcol<nz; bcol++) { 643 gcol = bs*garray[*jj]; jj++; 644 for (col=0; col<bs; col++) { 645 for (row=0; row<bs; row++) { 646 vabs = PetscAbsScalar(*v); v++; 647 rsum[gcol+col] += vabs; 648 rsum[grow+row] += vabs; 649 } 650 } 651 } 652 } 653 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 654 *norm = 0.0; 655 for (col=0; col<mat->cmap->N; col++) { 656 if (rsum2[col] > *norm) *norm = rsum2[col]; 657 } 658 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 659 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 660 } 661 PetscFunctionReturn(0); 662 } 663 664 #undef __FUNCT__ 665 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 666 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 667 { 668 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 669 PetscErrorCode ierr; 670 PetscInt nstash,reallocs; 671 672 PetscFunctionBegin; 673 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 674 675 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 676 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 677 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 678 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 679 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 680 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 681 PetscFunctionReturn(0); 682 } 683 684 #undef __FUNCT__ 685 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 686 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 687 { 688 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 689 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 690 PetscErrorCode ierr; 691 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 692 PetscInt *row,*col; 693 PetscBool other_disassembled; 694 PetscMPIInt n; 695 PetscBool r1,r2,r3; 696 MatScalar *val; 697 698 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 699 PetscFunctionBegin; 700 if (!baij->donotstash && !mat->nooffprocentries) { 701 while (1) { 702 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 703 if (!flg) break; 704 705 for (i=0; i<n;) { 706 /* Now identify the consecutive vals belonging to the same row */ 707 for (j=i,rstart=row[j]; j<n; j++) { 708 if (row[j] != rstart) break; 709 } 710 if (j < n) ncols = j-i; 711 else ncols = n-i; 712 /* Now assemble all these values with a single function call */ 713 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 714 i = j; 715 } 716 } 717 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 718 /* Now process the block-stash. Since the values are stashed column-oriented, 719 set the roworiented flag to column oriented, and after MatSetValues() 720 restore the original flags */ 721 r1 = baij->roworiented; 722 r2 = a->roworiented; 723 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 724 725 baij->roworiented = PETSC_FALSE; 726 a->roworiented = PETSC_FALSE; 727 728 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 729 while (1) { 730 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 731 if (!flg) break; 732 733 for (i=0; i<n;) { 734 /* Now identify the consecutive vals belonging to the same row */ 735 for (j=i,rstart=row[j]; j<n; j++) { 736 if (row[j] != rstart) break; 737 } 738 if (j < n) ncols = j-i; 739 else ncols = n-i; 740 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 741 i = j; 742 } 743 } 744 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 745 746 baij->roworiented = r1; 747 a->roworiented = r2; 748 749 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 750 } 751 752 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 753 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 754 755 /* determine if any processor has disassembled, if so we must 756 also disassemble ourselfs, in order that we may reassemble. */ 757 /* 758 if nonzero structure of submatrix B cannot change then we know that 759 no processor disassembled thus we can skip this stuff 760 */ 761 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 762 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 763 if (mat->was_assembled && !other_disassembled) { 764 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 765 } 766 } 767 768 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 769 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 770 } 771 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 772 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 773 774 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 775 776 baij->rowvalues = 0; 777 778 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 779 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 780 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 781 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 782 } 783 PetscFunctionReturn(0); 784 } 785 786 extern PetscErrorCode MatView_SeqSBAIJ(Mat,PetscViewer); 787 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 788 #include <petscdraw.h> 789 #undef __FUNCT__ 790 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 791 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 792 { 793 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 794 PetscErrorCode ierr; 795 PetscInt bs = mat->rmap->bs; 796 PetscMPIInt rank = baij->rank; 797 PetscBool iascii,isdraw; 798 PetscViewer sviewer; 799 PetscViewerFormat format; 800 801 PetscFunctionBegin; 802 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 803 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 804 if (iascii) { 805 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 806 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 807 MatInfo info; 808 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 809 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 810 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 811 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 812 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 813 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 814 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 815 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 816 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 817 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 818 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 819 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 820 PetscFunctionReturn(0); 821 } else if (format == PETSC_VIEWER_ASCII_INFO) { 822 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 823 PetscFunctionReturn(0); 824 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 825 PetscFunctionReturn(0); 826 } 827 } 828 829 if (isdraw) { 830 PetscDraw draw; 831 PetscBool isnull; 832 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 833 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 834 } 835 836 { 837 /* assemble the entire matrix onto first processor. */ 838 Mat A; 839 Mat_SeqSBAIJ *Aloc; 840 Mat_SeqBAIJ *Bloc; 841 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 842 MatScalar *a; 843 const char *matname; 844 845 /* Should this be the same type as mat? */ 846 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 847 if (!rank) { 848 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 849 } else { 850 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 851 } 852 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 853 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 854 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 855 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 856 857 /* copy over the A part */ 858 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 859 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 860 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 861 862 for (i=0; i<mbs; i++) { 863 rvals[0] = bs*(baij->rstartbs + i); 864 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 865 for (j=ai[i]; j<ai[i+1]; j++) { 866 col = (baij->cstartbs+aj[j])*bs; 867 for (k=0; k<bs; k++) { 868 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 869 col++; 870 a += bs; 871 } 872 } 873 } 874 /* copy over the B part */ 875 Bloc = (Mat_SeqBAIJ*)baij->B->data; 876 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 877 for (i=0; i<mbs; i++) { 878 879 rvals[0] = bs*(baij->rstartbs + i); 880 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 881 for (j=ai[i]; j<ai[i+1]; j++) { 882 col = baij->garray[aj[j]]*bs; 883 for (k=0; k<bs; k++) { 884 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 885 col++; 886 a += bs; 887 } 888 } 889 } 890 ierr = PetscFree(rvals);CHKERRQ(ierr); 891 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 892 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 893 /* 894 Everyone has to call to draw the matrix since the graphics waits are 895 synchronized across all processors that share the PetscDraw object 896 */ 897 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 898 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 899 if (!rank) { 900 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 901 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 902 } 903 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 904 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 905 ierr = MatDestroy(&A);CHKERRQ(ierr); 906 } 907 PetscFunctionReturn(0); 908 } 909 910 #undef __FUNCT__ 911 #define __FUNCT__ "MatView_MPISBAIJ" 912 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 913 { 914 PetscErrorCode ierr; 915 PetscBool iascii,isdraw,issocket,isbinary; 916 917 PetscFunctionBegin; 918 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 919 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 920 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 921 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 922 if (iascii || isdraw || issocket || isbinary) { 923 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 924 } 925 PetscFunctionReturn(0); 926 } 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "MatDestroy_MPISBAIJ" 930 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 931 { 932 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 933 PetscErrorCode ierr; 934 935 PetscFunctionBegin; 936 #if defined(PETSC_USE_LOG) 937 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 938 #endif 939 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 940 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 941 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 942 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 943 #if defined(PETSC_USE_CTABLE) 944 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 945 #else 946 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 947 #endif 948 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 949 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 950 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 951 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 952 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 953 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 954 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 955 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 956 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 957 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 958 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 959 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 960 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 961 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 962 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 963 #if defined(PETSC_USE_REAL_MAT_SINGLE) 964 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 965 #endif 966 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 967 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 968 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 969 ierr = PetscFree(mat->data);CHKERRQ(ierr); 970 971 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 972 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 973 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 974 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 975 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 976 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 977 #if defined(PETSC_HAVE_ELEMENTAL) 978 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 979 #endif 980 PetscFunctionReturn(0); 981 } 982 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 985 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 986 { 987 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 988 PetscErrorCode ierr; 989 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 990 PetscScalar *from; 991 const PetscScalar *x; 992 993 PetscFunctionBegin; 994 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 995 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 996 997 /* diagonal part */ 998 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 999 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1000 1001 /* subdiagonal part */ 1002 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1003 1004 /* copy x into the vec slvec0 */ 1005 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1006 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1007 1008 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1009 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1010 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1011 1012 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1013 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1014 /* supperdiagonal part */ 1015 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "MatMult_MPISBAIJ" 1021 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1022 { 1023 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1024 PetscErrorCode ierr; 1025 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1026 PetscScalar *from; 1027 const PetscScalar *x; 1028 1029 PetscFunctionBegin; 1030 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1031 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1032 1033 /* diagonal part */ 1034 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1035 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1036 1037 /* subdiagonal part */ 1038 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1039 1040 /* copy x into the vec slvec0 */ 1041 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1042 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1043 1044 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1045 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1046 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1047 1048 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1049 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1050 /* supperdiagonal part */ 1051 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1052 PetscFunctionReturn(0); 1053 } 1054 1055 #undef __FUNCT__ 1056 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1057 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1058 { 1059 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1060 PetscErrorCode ierr; 1061 PetscInt nt; 1062 1063 PetscFunctionBegin; 1064 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1065 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1066 1067 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1068 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1069 1070 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1071 /* do diagonal part */ 1072 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1073 /* do supperdiagonal part */ 1074 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1075 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1076 /* do subdiagonal part */ 1077 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1078 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1079 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1080 PetscFunctionReturn(0); 1081 } 1082 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1085 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1086 { 1087 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1088 PetscErrorCode ierr; 1089 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1090 PetscScalar *from,zero=0.0; 1091 const PetscScalar *x; 1092 1093 PetscFunctionBegin; 1094 /* 1095 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1096 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1097 */ 1098 /* diagonal part */ 1099 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1100 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1101 1102 /* subdiagonal part */ 1103 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1104 1105 /* copy x into the vec slvec0 */ 1106 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1107 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1108 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1109 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1110 1111 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 1115 /* supperdiagonal part */ 1116 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1122 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1123 { 1124 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1125 PetscErrorCode ierr; 1126 1127 PetscFunctionBegin; 1128 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1129 /* do diagonal part */ 1130 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1131 /* do supperdiagonal part */ 1132 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1133 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1134 1135 /* do subdiagonal part */ 1136 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1137 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 /* 1143 This only works correctly for square matrices where the subblock A->A is the 1144 diagonal block 1145 */ 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1148 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1149 { 1150 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1151 PetscErrorCode ierr; 1152 1153 PetscFunctionBegin; 1154 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1155 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #undef __FUNCT__ 1160 #define __FUNCT__ "MatScale_MPISBAIJ" 1161 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1162 { 1163 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1164 PetscErrorCode ierr; 1165 1166 PetscFunctionBegin; 1167 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1168 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1169 PetscFunctionReturn(0); 1170 } 1171 1172 #undef __FUNCT__ 1173 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1174 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1175 { 1176 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1177 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1178 PetscErrorCode ierr; 1179 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1180 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1181 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1182 1183 PetscFunctionBegin; 1184 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1185 mat->getrowactive = PETSC_TRUE; 1186 1187 if (!mat->rowvalues && (idx || v)) { 1188 /* 1189 allocate enough space to hold information from the longest row. 1190 */ 1191 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1192 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1193 PetscInt max = 1,mbs = mat->mbs,tmp; 1194 for (i=0; i<mbs; i++) { 1195 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1196 if (max < tmp) max = tmp; 1197 } 1198 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1199 } 1200 1201 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1202 lrow = row - brstart; /* local row index */ 1203 1204 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1205 if (!v) {pvA = 0; pvB = 0;} 1206 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1207 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1208 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1209 nztot = nzA + nzB; 1210 1211 cmap = mat->garray; 1212 if (v || idx) { 1213 if (nztot) { 1214 /* Sort by increasing column numbers, assuming A and B already sorted */ 1215 PetscInt imark = -1; 1216 if (v) { 1217 *v = v_p = mat->rowvalues; 1218 for (i=0; i<nzB; i++) { 1219 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1220 else break; 1221 } 1222 imark = i; 1223 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1224 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1225 } 1226 if (idx) { 1227 *idx = idx_p = mat->rowindices; 1228 if (imark > -1) { 1229 for (i=0; i<imark; i++) { 1230 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1231 } 1232 } else { 1233 for (i=0; i<nzB; i++) { 1234 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1235 else break; 1236 } 1237 imark = i; 1238 } 1239 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1240 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1241 } 1242 } else { 1243 if (idx) *idx = 0; 1244 if (v) *v = 0; 1245 } 1246 } 1247 *nz = nztot; 1248 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1249 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1255 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1256 { 1257 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1258 1259 PetscFunctionBegin; 1260 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1261 baij->getrowactive = PETSC_FALSE; 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #undef __FUNCT__ 1266 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1267 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1268 { 1269 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1270 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1271 1272 PetscFunctionBegin; 1273 aA->getrow_utriangular = PETSC_TRUE; 1274 PetscFunctionReturn(0); 1275 } 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1278 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1279 { 1280 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1281 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1282 1283 PetscFunctionBegin; 1284 aA->getrow_utriangular = PETSC_FALSE; 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1290 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1291 { 1292 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1293 PetscErrorCode ierr; 1294 1295 PetscFunctionBegin; 1296 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1297 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1298 PetscFunctionReturn(0); 1299 } 1300 1301 #undef __FUNCT__ 1302 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1303 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1304 { 1305 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1306 PetscErrorCode ierr; 1307 1308 PetscFunctionBegin; 1309 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1310 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1311 PetscFunctionReturn(0); 1312 } 1313 1314 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ() 1315 Input: isrow - distributed(parallel), 1316 iscol_local - locally owned (seq) 1317 */ 1318 #undef __FUNCT__ 1319 #define __FUNCT__ "ISEqual_private" 1320 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1321 { 1322 PetscErrorCode ierr; 1323 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1324 const PetscInt *ptr1,*ptr2; 1325 1326 PetscFunctionBegin; 1327 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1328 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1329 if (sz1 > sz2) { 1330 *flg = PETSC_FALSE; 1331 PetscFunctionReturn(0); 1332 } 1333 1334 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1335 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1336 1337 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1338 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1339 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1340 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1341 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1342 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1343 1344 nmatch=0; 1345 k = 0; 1346 for (i=0; i<sz1; i++){ 1347 for (j=k; j<sz2; j++){ 1348 if (a1[i] == a2[j]) { 1349 k = j; nmatch++; 1350 break; 1351 } 1352 } 1353 } 1354 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1355 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1356 ierr = PetscFree(a1);CHKERRQ(ierr); 1357 ierr = PetscFree(a2);CHKERRQ(ierr); 1358 if (nmatch < sz1) { 1359 *flg = PETSC_FALSE; 1360 } else { 1361 *flg = PETSC_TRUE; 1362 } 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ" 1368 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1369 { 1370 PetscErrorCode ierr; 1371 IS iscol_local; 1372 PetscInt csize; 1373 PetscBool isequal; 1374 1375 PetscFunctionBegin; 1376 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1377 if (call == MAT_REUSE_MATRIX) { 1378 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1379 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1380 } else { 1381 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1382 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1383 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1384 } 1385 1386 /* now call MatGetSubMatrix_MPIBAIJ() */ 1387 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1388 if (call == MAT_INITIAL_MATRIX) { 1389 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1390 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1391 } 1392 PetscFunctionReturn(0); 1393 } 1394 1395 #undef __FUNCT__ 1396 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1397 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1398 { 1399 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1400 PetscErrorCode ierr; 1401 1402 PetscFunctionBegin; 1403 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1404 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1405 PetscFunctionReturn(0); 1406 } 1407 1408 #undef __FUNCT__ 1409 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1410 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1411 { 1412 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1413 Mat A = a->A,B = a->B; 1414 PetscErrorCode ierr; 1415 PetscReal isend[5],irecv[5]; 1416 1417 PetscFunctionBegin; 1418 info->block_size = (PetscReal)matin->rmap->bs; 1419 1420 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1421 1422 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1423 isend[3] = info->memory; isend[4] = info->mallocs; 1424 1425 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1426 1427 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1428 isend[3] += info->memory; isend[4] += info->mallocs; 1429 if (flag == MAT_LOCAL) { 1430 info->nz_used = isend[0]; 1431 info->nz_allocated = isend[1]; 1432 info->nz_unneeded = isend[2]; 1433 info->memory = isend[3]; 1434 info->mallocs = isend[4]; 1435 } else if (flag == MAT_GLOBAL_MAX) { 1436 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1437 1438 info->nz_used = irecv[0]; 1439 info->nz_allocated = irecv[1]; 1440 info->nz_unneeded = irecv[2]; 1441 info->memory = irecv[3]; 1442 info->mallocs = irecv[4]; 1443 } else if (flag == MAT_GLOBAL_SUM) { 1444 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1445 1446 info->nz_used = irecv[0]; 1447 info->nz_allocated = irecv[1]; 1448 info->nz_unneeded = irecv[2]; 1449 info->memory = irecv[3]; 1450 info->mallocs = irecv[4]; 1451 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1452 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1453 info->fill_ratio_needed = 0; 1454 info->factor_mallocs = 0; 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1460 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1461 { 1462 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1463 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1464 PetscErrorCode ierr; 1465 1466 PetscFunctionBegin; 1467 switch (op) { 1468 case MAT_NEW_NONZERO_LOCATIONS: 1469 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1470 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1471 case MAT_KEEP_NONZERO_PATTERN: 1472 case MAT_NEW_NONZERO_LOCATION_ERR: 1473 MatCheckPreallocated(A,1); 1474 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1475 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1476 break; 1477 case MAT_ROW_ORIENTED: 1478 MatCheckPreallocated(A,1); 1479 a->roworiented = flg; 1480 1481 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1482 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1483 break; 1484 case MAT_NEW_DIAGONALS: 1485 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1486 break; 1487 case MAT_IGNORE_OFF_PROC_ENTRIES: 1488 a->donotstash = flg; 1489 break; 1490 case MAT_USE_HASH_TABLE: 1491 a->ht_flag = flg; 1492 break; 1493 case MAT_HERMITIAN: 1494 MatCheckPreallocated(A,1); 1495 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1496 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1497 1498 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1499 break; 1500 case MAT_SPD: 1501 A->spd_set = PETSC_TRUE; 1502 A->spd = flg; 1503 if (flg) { 1504 A->symmetric = PETSC_TRUE; 1505 A->structurally_symmetric = PETSC_TRUE; 1506 A->symmetric_set = PETSC_TRUE; 1507 A->structurally_symmetric_set = PETSC_TRUE; 1508 } 1509 break; 1510 case MAT_SYMMETRIC: 1511 MatCheckPreallocated(A,1); 1512 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1513 break; 1514 case MAT_STRUCTURALLY_SYMMETRIC: 1515 MatCheckPreallocated(A,1); 1516 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1517 break; 1518 case MAT_SYMMETRY_ETERNAL: 1519 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1520 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1521 break; 1522 case MAT_IGNORE_LOWER_TRIANGULAR: 1523 aA->ignore_ltriangular = flg; 1524 break; 1525 case MAT_ERROR_LOWER_TRIANGULAR: 1526 aA->ignore_ltriangular = flg; 1527 break; 1528 case MAT_GETROW_UPPERTRIANGULAR: 1529 aA->getrow_utriangular = flg; 1530 break; 1531 default: 1532 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1533 } 1534 PetscFunctionReturn(0); 1535 } 1536 1537 #undef __FUNCT__ 1538 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1539 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1540 { 1541 PetscErrorCode ierr; 1542 1543 PetscFunctionBegin; 1544 if (MAT_INITIAL_MATRIX || *B != A) { 1545 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1546 } 1547 PetscFunctionReturn(0); 1548 } 1549 1550 #undef __FUNCT__ 1551 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1552 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1553 { 1554 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1555 Mat a = baij->A, b=baij->B; 1556 PetscErrorCode ierr; 1557 PetscInt nv,m,n; 1558 PetscBool flg; 1559 1560 PetscFunctionBegin; 1561 if (ll != rr) { 1562 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1563 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1564 } 1565 if (!ll) PetscFunctionReturn(0); 1566 1567 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1568 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1569 1570 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1571 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1572 1573 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1574 1575 /* left diagonalscale the off-diagonal part */ 1576 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1577 1578 /* scale the diagonal part */ 1579 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1580 1581 /* right diagonalscale the off-diagonal part */ 1582 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1583 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1584 PetscFunctionReturn(0); 1585 } 1586 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1589 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1590 { 1591 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1592 PetscErrorCode ierr; 1593 1594 PetscFunctionBegin; 1595 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1596 PetscFunctionReturn(0); 1597 } 1598 1599 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1600 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatEqual_MPISBAIJ" 1603 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1604 { 1605 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1606 Mat a,b,c,d; 1607 PetscBool flg; 1608 PetscErrorCode ierr; 1609 1610 PetscFunctionBegin; 1611 a = matA->A; b = matA->B; 1612 c = matB->A; d = matB->B; 1613 1614 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1615 if (flg) { 1616 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1617 } 1618 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1619 PetscFunctionReturn(0); 1620 } 1621 1622 #undef __FUNCT__ 1623 #define __FUNCT__ "MatCopy_MPISBAIJ" 1624 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1625 { 1626 PetscErrorCode ierr; 1627 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1628 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1629 1630 PetscFunctionBegin; 1631 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1632 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1633 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1634 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1635 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1636 } else { 1637 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1638 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1639 } 1640 PetscFunctionReturn(0); 1641 } 1642 1643 #undef __FUNCT__ 1644 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1645 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1646 { 1647 PetscErrorCode ierr; 1648 1649 PetscFunctionBegin; 1650 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1651 PetscFunctionReturn(0); 1652 } 1653 1654 #undef __FUNCT__ 1655 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1656 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1657 { 1658 PetscErrorCode ierr; 1659 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1660 PetscBLASInt bnz,one=1; 1661 Mat_SeqSBAIJ *xa,*ya; 1662 Mat_SeqBAIJ *xb,*yb; 1663 1664 PetscFunctionBegin; 1665 if (str == SAME_NONZERO_PATTERN) { 1666 PetscScalar alpha = a; 1667 xa = (Mat_SeqSBAIJ*)xx->A->data; 1668 ya = (Mat_SeqSBAIJ*)yy->A->data; 1669 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1670 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1671 xb = (Mat_SeqBAIJ*)xx->B->data; 1672 yb = (Mat_SeqBAIJ*)yy->B->data; 1673 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1674 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1675 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1676 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1677 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1678 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1679 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1680 } else { 1681 Mat B; 1682 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1683 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1684 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1685 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1686 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1687 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1688 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1689 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1690 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1691 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1692 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1693 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1694 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1695 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1696 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1697 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1698 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1699 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1700 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1701 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1702 } 1703 PetscFunctionReturn(0); 1704 } 1705 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1708 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1709 { 1710 PetscErrorCode ierr; 1711 PetscInt i; 1712 PetscBool flg,sorted; 1713 1714 PetscFunctionBegin; 1715 for (i = 0; i < n; i++) { 1716 ierr = ISSorted(irow[i],&sorted);CHKERRQ(ierr); 1717 if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row index set %d not sorted",i); 1718 ierr = ISSorted(icol[i],&sorted);CHKERRQ(ierr); 1719 if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Column index set %d not sorted",i); 1720 } 1721 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1722 for (i=0; i<n; i++) { 1723 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1724 if (!flg) { /* *B[i] is non-symmetric, set flag */ 1725 ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr); 1726 } 1727 } 1728 PetscFunctionReturn(0); 1729 } 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatShift_MPISBAIJ" 1733 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1734 { 1735 PetscErrorCode ierr; 1736 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1737 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1738 1739 PetscFunctionBegin; 1740 if (!Y->preallocated) { 1741 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1742 } else if (!aij->nz) { 1743 PetscInt nonew = aij->nonew; 1744 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1745 aij->nonew = nonew; 1746 } 1747 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1748 PetscFunctionReturn(0); 1749 } 1750 1751 #undef __FUNCT__ 1752 #define __FUNCT__ "MatMissingDiagonal_MPISBAIJ" 1753 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1754 { 1755 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1756 PetscErrorCode ierr; 1757 1758 PetscFunctionBegin; 1759 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1760 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1761 if (d) { 1762 PetscInt rstart; 1763 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1764 *d += rstart/A->rmap->bs; 1765 1766 } 1767 PetscFunctionReturn(0); 1768 } 1769 1770 1771 /* -------------------------------------------------------------------*/ 1772 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1773 MatGetRow_MPISBAIJ, 1774 MatRestoreRow_MPISBAIJ, 1775 MatMult_MPISBAIJ, 1776 /* 4*/ MatMultAdd_MPISBAIJ, 1777 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1778 MatMultAdd_MPISBAIJ, 1779 0, 1780 0, 1781 0, 1782 /* 10*/ 0, 1783 0, 1784 0, 1785 MatSOR_MPISBAIJ, 1786 MatTranspose_MPISBAIJ, 1787 /* 15*/ MatGetInfo_MPISBAIJ, 1788 MatEqual_MPISBAIJ, 1789 MatGetDiagonal_MPISBAIJ, 1790 MatDiagonalScale_MPISBAIJ, 1791 MatNorm_MPISBAIJ, 1792 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1793 MatAssemblyEnd_MPISBAIJ, 1794 MatSetOption_MPISBAIJ, 1795 MatZeroEntries_MPISBAIJ, 1796 /* 24*/ 0, 1797 0, 1798 0, 1799 0, 1800 0, 1801 /* 29*/ MatSetUp_MPISBAIJ, 1802 0, 1803 0, 1804 0, 1805 0, 1806 /* 34*/ MatDuplicate_MPISBAIJ, 1807 0, 1808 0, 1809 0, 1810 0, 1811 /* 39*/ MatAXPY_MPISBAIJ, 1812 MatGetSubMatrices_MPISBAIJ, 1813 MatIncreaseOverlap_MPISBAIJ, 1814 MatGetValues_MPISBAIJ, 1815 MatCopy_MPISBAIJ, 1816 /* 44*/ 0, 1817 MatScale_MPISBAIJ, 1818 MatShift_MPISBAIJ, 1819 0, 1820 0, 1821 /* 49*/ 0, 1822 0, 1823 0, 1824 0, 1825 0, 1826 /* 54*/ 0, 1827 0, 1828 MatSetUnfactored_MPISBAIJ, 1829 0, 1830 MatSetValuesBlocked_MPISBAIJ, 1831 /* 59*/ MatGetSubMatrix_MPISBAIJ, 1832 0, 1833 0, 1834 0, 1835 0, 1836 /* 64*/ 0, 1837 0, 1838 0, 1839 0, 1840 0, 1841 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1842 0, 1843 0, 1844 0, 1845 0, 1846 /* 74*/ 0, 1847 0, 1848 0, 1849 0, 1850 0, 1851 /* 79*/ 0, 1852 0, 1853 0, 1854 0, 1855 MatLoad_MPISBAIJ, 1856 /* 84*/ 0, 1857 0, 1858 0, 1859 0, 1860 0, 1861 /* 89*/ 0, 1862 0, 1863 0, 1864 0, 1865 0, 1866 /* 94*/ 0, 1867 0, 1868 0, 1869 0, 1870 0, 1871 /* 99*/ 0, 1872 0, 1873 0, 1874 0, 1875 0, 1876 /*104*/ 0, 1877 MatRealPart_MPISBAIJ, 1878 MatImaginaryPart_MPISBAIJ, 1879 MatGetRowUpperTriangular_MPISBAIJ, 1880 MatRestoreRowUpperTriangular_MPISBAIJ, 1881 /*109*/ 0, 1882 0, 1883 0, 1884 0, 1885 MatMissingDiagonal_MPISBAIJ, 1886 /*114*/ 0, 1887 0, 1888 0, 1889 0, 1890 0, 1891 /*119*/ 0, 1892 0, 1893 0, 1894 0, 1895 0, 1896 /*124*/ 0, 1897 0, 1898 0, 1899 0, 1900 0, 1901 /*129*/ 0, 1902 0, 1903 0, 1904 0, 1905 0, 1906 /*134*/ 0, 1907 0, 1908 0, 1909 0, 1910 0, 1911 /*139*/ 0, 1912 0, 1913 0, 1914 0, 1915 0, 1916 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1917 }; 1918 1919 #undef __FUNCT__ 1920 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1921 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1922 { 1923 PetscFunctionBegin; 1924 *a = ((Mat_MPISBAIJ*)A->data)->A; 1925 PetscFunctionReturn(0); 1926 } 1927 1928 #undef __FUNCT__ 1929 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1930 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1931 { 1932 Mat_MPISBAIJ *b; 1933 PetscErrorCode ierr; 1934 PetscInt i,mbs,Mbs; 1935 1936 PetscFunctionBegin; 1937 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1938 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1939 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1940 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1941 1942 b = (Mat_MPISBAIJ*)B->data; 1943 mbs = B->rmap->n/bs; 1944 Mbs = B->rmap->N/bs; 1945 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1946 1947 B->rmap->bs = bs; 1948 b->bs2 = bs*bs; 1949 b->mbs = mbs; 1950 b->Mbs = Mbs; 1951 b->nbs = B->cmap->n/bs; 1952 b->Nbs = B->cmap->N/bs; 1953 1954 for (i=0; i<=b->size; i++) { 1955 b->rangebs[i] = B->rmap->range[i]/bs; 1956 } 1957 b->rstartbs = B->rmap->rstart/bs; 1958 b->rendbs = B->rmap->rend/bs; 1959 1960 b->cstartbs = B->cmap->rstart/bs; 1961 b->cendbs = B->cmap->rend/bs; 1962 1963 if (!B->preallocated) { 1964 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1965 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1966 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1967 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1968 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1969 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1970 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1971 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1972 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1973 } 1974 1975 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1976 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1977 1978 B->preallocated = PETSC_TRUE; 1979 PetscFunctionReturn(0); 1980 } 1981 1982 #undef __FUNCT__ 1983 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1984 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1985 { 1986 PetscInt m,rstart,cstart,cend; 1987 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1988 const PetscInt *JJ =0; 1989 PetscScalar *values=0; 1990 PetscErrorCode ierr; 1991 1992 PetscFunctionBegin; 1993 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1994 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1995 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1996 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1997 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1998 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1999 m = B->rmap->n/bs; 2000 rstart = B->rmap->rstart/bs; 2001 cstart = B->cmap->rstart/bs; 2002 cend = B->cmap->rend/bs; 2003 2004 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2005 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2006 for (i=0; i<m; i++) { 2007 nz = ii[i+1] - ii[i]; 2008 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2009 nz_max = PetscMax(nz_max,nz); 2010 JJ = jj + ii[i]; 2011 for (j=0; j<nz; j++) { 2012 if (*JJ >= cstart) break; 2013 JJ++; 2014 } 2015 d = 0; 2016 for (; j<nz; j++) { 2017 if (*JJ++ >= cend) break; 2018 d++; 2019 } 2020 d_nnz[i] = d; 2021 o_nnz[i] = nz - d; 2022 } 2023 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2024 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2025 2026 values = (PetscScalar*)V; 2027 if (!values) { 2028 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2029 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2030 } 2031 for (i=0; i<m; i++) { 2032 PetscInt row = i + rstart; 2033 PetscInt ncols = ii[i+1] - ii[i]; 2034 const PetscInt *icols = jj + ii[i]; 2035 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2036 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2037 } 2038 2039 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2040 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2041 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2042 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2043 PetscFunctionReturn(0); 2044 } 2045 2046 /*MC 2047 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2048 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2049 the matrix is stored. 2050 2051 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2052 can call MatSetOption(Mat, MAT_HERMITIAN); 2053 2054 Options Database Keys: 2055 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2056 2057 Level: beginner 2058 2059 .seealso: MatCreateMPISBAIJ 2060 M*/ 2061 2062 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 2063 2064 #undef __FUNCT__ 2065 #define __FUNCT__ "MatCreate_MPISBAIJ" 2066 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2067 { 2068 Mat_MPISBAIJ *b; 2069 PetscErrorCode ierr; 2070 PetscBool flg = PETSC_FALSE; 2071 2072 PetscFunctionBegin; 2073 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2074 B->data = (void*)b; 2075 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2076 2077 B->ops->destroy = MatDestroy_MPISBAIJ; 2078 B->ops->view = MatView_MPISBAIJ; 2079 B->assembled = PETSC_FALSE; 2080 B->insertmode = NOT_SET_VALUES; 2081 2082 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2083 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2084 2085 /* build local table of row and column ownerships */ 2086 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2087 2088 /* build cache for off array entries formed */ 2089 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2090 2091 b->donotstash = PETSC_FALSE; 2092 b->colmap = NULL; 2093 b->garray = NULL; 2094 b->roworiented = PETSC_TRUE; 2095 2096 /* stuff used in block assembly */ 2097 b->barray = 0; 2098 2099 /* stuff used for matrix vector multiply */ 2100 b->lvec = 0; 2101 b->Mvctx = 0; 2102 b->slvec0 = 0; 2103 b->slvec0b = 0; 2104 b->slvec1 = 0; 2105 b->slvec1a = 0; 2106 b->slvec1b = 0; 2107 b->sMvctx = 0; 2108 2109 /* stuff for MatGetRow() */ 2110 b->rowindices = 0; 2111 b->rowvalues = 0; 2112 b->getrowactive = PETSC_FALSE; 2113 2114 /* hash table stuff */ 2115 b->ht = 0; 2116 b->hd = 0; 2117 b->ht_size = 0; 2118 b->ht_flag = PETSC_FALSE; 2119 b->ht_fact = 0; 2120 b->ht_total_ct = 0; 2121 b->ht_insert_ct = 0; 2122 2123 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 2124 b->ijonly = PETSC_FALSE; 2125 2126 b->in_loc = 0; 2127 b->v_loc = 0; 2128 b->n_loc = 0; 2129 2130 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2131 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2132 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 2133 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2134 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2135 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 2136 #if defined(PETSC_HAVE_ELEMENTAL) 2137 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2138 #endif 2139 2140 B->symmetric = PETSC_TRUE; 2141 B->structurally_symmetric = PETSC_TRUE; 2142 B->symmetric_set = PETSC_TRUE; 2143 B->structurally_symmetric_set = PETSC_TRUE; 2144 2145 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2146 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2147 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2148 if (flg) { 2149 PetscReal fact = 1.39; 2150 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2151 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2152 if (fact <= 1.0) fact = 1.39; 2153 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2154 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2155 } 2156 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2157 PetscFunctionReturn(0); 2158 } 2159 2160 /*MC 2161 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2162 2163 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2164 and MATMPISBAIJ otherwise. 2165 2166 Options Database Keys: 2167 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2168 2169 Level: beginner 2170 2171 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2172 M*/ 2173 2174 #undef __FUNCT__ 2175 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 2176 /*@C 2177 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2178 the user should preallocate the matrix storage by setting the parameters 2179 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2180 performance can be increased by more than a factor of 50. 2181 2182 Collective on Mat 2183 2184 Input Parameters: 2185 + B - the matrix 2186 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2187 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2188 . d_nz - number of block nonzeros per block row in diagonal portion of local 2189 submatrix (same for all local rows) 2190 . d_nnz - array containing the number of block nonzeros in the various block rows 2191 in the upper triangular and diagonal part of the in diagonal portion of the local 2192 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2193 for the diagonal entry and set a value even if it is zero. 2194 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2195 submatrix (same for all local rows). 2196 - o_nnz - array containing the number of nonzeros in the various block rows of the 2197 off-diagonal portion of the local submatrix that is right of the diagonal 2198 (possibly different for each block row) or NULL. 2199 2200 2201 Options Database Keys: 2202 . -mat_no_unroll - uses code that does not unroll the loops in the 2203 block calculations (much slower) 2204 . -mat_block_size - size of the blocks to use 2205 2206 Notes: 2207 2208 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2209 than it must be used on all processors that share the object for that argument. 2210 2211 If the *_nnz parameter is given then the *_nz parameter is ignored 2212 2213 Storage Information: 2214 For a square global matrix we define each processor's diagonal portion 2215 to be its local rows and the corresponding columns (a square submatrix); 2216 each processor's off-diagonal portion encompasses the remainder of the 2217 local matrix (a rectangular submatrix). 2218 2219 The user can specify preallocated storage for the diagonal part of 2220 the local submatrix with either d_nz or d_nnz (not both). Set 2221 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2222 memory allocation. Likewise, specify preallocated storage for the 2223 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2224 2225 You can call MatGetInfo() to get information on how effective the preallocation was; 2226 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2227 You can also run with the option -info and look for messages with the string 2228 malloc in them to see if additional memory allocation was needed. 2229 2230 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2231 the figure below we depict these three local rows and all columns (0-11). 2232 2233 .vb 2234 0 1 2 3 4 5 6 7 8 9 10 11 2235 -------------------------- 2236 row 3 |. . . d d d o o o o o o 2237 row 4 |. . . d d d o o o o o o 2238 row 5 |. . . d d d o o o o o o 2239 -------------------------- 2240 .ve 2241 2242 Thus, any entries in the d locations are stored in the d (diagonal) 2243 submatrix, and any entries in the o locations are stored in the 2244 o (off-diagonal) submatrix. Note that the d matrix is stored in 2245 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2246 2247 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2248 plus the diagonal part of the d matrix, 2249 and o_nz should indicate the number of block nonzeros per row in the o matrix 2250 2251 In general, for PDE problems in which most nonzeros are near the diagonal, 2252 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2253 or you will get TERRIBLE performance; see the users' manual chapter on 2254 matrices. 2255 2256 Level: intermediate 2257 2258 .keywords: matrix, block, aij, compressed row, sparse, parallel 2259 2260 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2261 @*/ 2262 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2263 { 2264 PetscErrorCode ierr; 2265 2266 PetscFunctionBegin; 2267 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2268 PetscValidType(B,1); 2269 PetscValidLogicalCollectiveInt(B,bs,2); 2270 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2271 PetscFunctionReturn(0); 2272 } 2273 2274 #undef __FUNCT__ 2275 #define __FUNCT__ "MatCreateSBAIJ" 2276 /*@C 2277 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2278 (block compressed row). For good matrix assembly performance 2279 the user should preallocate the matrix storage by setting the parameters 2280 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2281 performance can be increased by more than a factor of 50. 2282 2283 Collective on MPI_Comm 2284 2285 Input Parameters: 2286 + comm - MPI communicator 2287 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2288 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2289 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2290 This value should be the same as the local size used in creating the 2291 y vector for the matrix-vector product y = Ax. 2292 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2293 This value should be the same as the local size used in creating the 2294 x vector for the matrix-vector product y = Ax. 2295 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2296 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2297 . d_nz - number of block nonzeros per block row in diagonal portion of local 2298 submatrix (same for all local rows) 2299 . d_nnz - array containing the number of block nonzeros in the various block rows 2300 in the upper triangular portion of the in diagonal portion of the local 2301 (possibly different for each block block row) or NULL. 2302 If you plan to factor the matrix you must leave room for the diagonal entry and 2303 set its value even if it is zero. 2304 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2305 submatrix (same for all local rows). 2306 - o_nnz - array containing the number of nonzeros in the various block rows of the 2307 off-diagonal portion of the local submatrix (possibly different for 2308 each block row) or NULL. 2309 2310 Output Parameter: 2311 . A - the matrix 2312 2313 Options Database Keys: 2314 . -mat_no_unroll - uses code that does not unroll the loops in the 2315 block calculations (much slower) 2316 . -mat_block_size - size of the blocks to use 2317 . -mat_mpi - use the parallel matrix data structures even on one processor 2318 (defaults to using SeqBAIJ format on one processor) 2319 2320 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2321 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2322 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2323 2324 Notes: 2325 The number of rows and columns must be divisible by blocksize. 2326 This matrix type does not support complex Hermitian operation. 2327 2328 The user MUST specify either the local or global matrix dimensions 2329 (possibly both). 2330 2331 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2332 than it must be used on all processors that share the object for that argument. 2333 2334 If the *_nnz parameter is given then the *_nz parameter is ignored 2335 2336 Storage Information: 2337 For a square global matrix we define each processor's diagonal portion 2338 to be its local rows and the corresponding columns (a square submatrix); 2339 each processor's off-diagonal portion encompasses the remainder of the 2340 local matrix (a rectangular submatrix). 2341 2342 The user can specify preallocated storage for the diagonal part of 2343 the local submatrix with either d_nz or d_nnz (not both). Set 2344 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2345 memory allocation. Likewise, specify preallocated storage for the 2346 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2347 2348 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2349 the figure below we depict these three local rows and all columns (0-11). 2350 2351 .vb 2352 0 1 2 3 4 5 6 7 8 9 10 11 2353 -------------------------- 2354 row 3 |. . . d d d o o o o o o 2355 row 4 |. . . d d d o o o o o o 2356 row 5 |. . . d d d o o o o o o 2357 -------------------------- 2358 .ve 2359 2360 Thus, any entries in the d locations are stored in the d (diagonal) 2361 submatrix, and any entries in the o locations are stored in the 2362 o (off-diagonal) submatrix. Note that the d matrix is stored in 2363 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2364 2365 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2366 plus the diagonal part of the d matrix, 2367 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2368 In general, for PDE problems in which most nonzeros are near the diagonal, 2369 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2370 or you will get TERRIBLE performance; see the users' manual chapter on 2371 matrices. 2372 2373 Level: intermediate 2374 2375 .keywords: matrix, block, aij, compressed row, sparse, parallel 2376 2377 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2378 @*/ 2379 2380 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2381 { 2382 PetscErrorCode ierr; 2383 PetscMPIInt size; 2384 2385 PetscFunctionBegin; 2386 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2387 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2388 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2389 if (size > 1) { 2390 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2391 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2392 } else { 2393 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2394 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2395 } 2396 PetscFunctionReturn(0); 2397 } 2398 2399 2400 #undef __FUNCT__ 2401 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2402 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2403 { 2404 Mat mat; 2405 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2406 PetscErrorCode ierr; 2407 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2408 PetscScalar *array; 2409 2410 PetscFunctionBegin; 2411 *newmat = 0; 2412 2413 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2414 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2415 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2416 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2417 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2418 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2419 2420 mat->factortype = matin->factortype; 2421 mat->preallocated = PETSC_TRUE; 2422 mat->assembled = PETSC_TRUE; 2423 mat->insertmode = NOT_SET_VALUES; 2424 2425 a = (Mat_MPISBAIJ*)mat->data; 2426 a->bs2 = oldmat->bs2; 2427 a->mbs = oldmat->mbs; 2428 a->nbs = oldmat->nbs; 2429 a->Mbs = oldmat->Mbs; 2430 a->Nbs = oldmat->Nbs; 2431 2432 2433 a->size = oldmat->size; 2434 a->rank = oldmat->rank; 2435 a->donotstash = oldmat->donotstash; 2436 a->roworiented = oldmat->roworiented; 2437 a->rowindices = 0; 2438 a->rowvalues = 0; 2439 a->getrowactive = PETSC_FALSE; 2440 a->barray = 0; 2441 a->rstartbs = oldmat->rstartbs; 2442 a->rendbs = oldmat->rendbs; 2443 a->cstartbs = oldmat->cstartbs; 2444 a->cendbs = oldmat->cendbs; 2445 2446 /* hash table stuff */ 2447 a->ht = 0; 2448 a->hd = 0; 2449 a->ht_size = 0; 2450 a->ht_flag = oldmat->ht_flag; 2451 a->ht_fact = oldmat->ht_fact; 2452 a->ht_total_ct = 0; 2453 a->ht_insert_ct = 0; 2454 2455 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2456 if (oldmat->colmap) { 2457 #if defined(PETSC_USE_CTABLE) 2458 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2459 #else 2460 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2461 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2462 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2463 #endif 2464 } else a->colmap = 0; 2465 2466 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2467 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2468 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2469 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2470 } else a->garray = 0; 2471 2472 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2473 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2474 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2475 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2476 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2477 2478 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2479 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2480 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2481 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2482 2483 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2484 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2485 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2486 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2487 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2488 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2489 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2490 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2491 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2492 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2493 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2494 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2495 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2496 2497 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2498 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2499 a->sMvctx = oldmat->sMvctx; 2500 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2501 2502 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2503 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2504 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2505 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2506 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2507 *newmat = mat; 2508 PetscFunctionReturn(0); 2509 } 2510 2511 #undef __FUNCT__ 2512 #define __FUNCT__ "MatLoad_MPISBAIJ" 2513 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2514 { 2515 PetscErrorCode ierr; 2516 PetscInt i,nz,j,rstart,rend; 2517 PetscScalar *vals,*buf; 2518 MPI_Comm comm; 2519 MPI_Status status; 2520 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2521 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2522 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2523 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2524 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2525 PetscInt dcount,kmax,k,nzcount,tmp; 2526 int fd; 2527 2528 PetscFunctionBegin; 2529 /* force binary viewer to load .info file if it has not yet done so */ 2530 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2531 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2532 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2533 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2534 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2535 if (bs < 0) bs = 1; 2536 2537 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2538 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2539 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2540 if (!rank) { 2541 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2542 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2543 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2544 } 2545 2546 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2547 M = header[1]; 2548 N = header[2]; 2549 2550 /* If global sizes are set, check if they are consistent with that given in the file */ 2551 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2552 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2553 2554 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2555 2556 /* 2557 This code adds extra rows to make sure the number of rows is 2558 divisible by the blocksize 2559 */ 2560 Mbs = M/bs; 2561 extra_rows = bs - M + bs*(Mbs); 2562 if (extra_rows == bs) extra_rows = 0; 2563 else Mbs++; 2564 if (extra_rows &&!rank) { 2565 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2566 } 2567 2568 /* determine ownership of all rows */ 2569 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2570 mbs = Mbs/size + ((Mbs % size) > rank); 2571 m = mbs*bs; 2572 } else { /* User Set */ 2573 m = newmat->rmap->n; 2574 mbs = m/bs; 2575 } 2576 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2577 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2578 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2579 rowners[0] = 0; 2580 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2581 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2582 rstart = rowners[rank]; 2583 rend = rowners[rank+1]; 2584 2585 /* distribute row lengths to all processors */ 2586 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2587 if (!rank) { 2588 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2589 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2590 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2591 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2592 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2593 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2594 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2595 } else { 2596 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2597 } 2598 2599 if (!rank) { /* procs[0] */ 2600 /* calculate the number of nonzeros on each processor */ 2601 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2602 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2603 for (i=0; i<size; i++) { 2604 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2605 procsnz[i] += rowlengths[j]; 2606 } 2607 } 2608 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2609 2610 /* determine max buffer needed and allocate it */ 2611 maxnz = 0; 2612 for (i=0; i<size; i++) { 2613 maxnz = PetscMax(maxnz,procsnz[i]); 2614 } 2615 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2616 2617 /* read in my part of the matrix column indices */ 2618 nz = procsnz[0]; 2619 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2620 mycols = ibuf; 2621 if (size == 1) nz -= extra_rows; 2622 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2623 if (size == 1) { 2624 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2625 } 2626 2627 /* read in every ones (except the last) and ship off */ 2628 for (i=1; i<size-1; i++) { 2629 nz = procsnz[i]; 2630 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2631 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2632 } 2633 /* read in the stuff for the last proc */ 2634 if (size != 1) { 2635 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2636 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2637 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2638 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2639 } 2640 ierr = PetscFree(cols);CHKERRQ(ierr); 2641 } else { /* procs[i], i>0 */ 2642 /* determine buffer space needed for message */ 2643 nz = 0; 2644 for (i=0; i<m; i++) nz += locrowlens[i]; 2645 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2646 mycols = ibuf; 2647 /* receive message of column indices*/ 2648 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2649 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2650 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2651 } 2652 2653 /* loop over local rows, determining number of off diagonal entries */ 2654 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2655 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2656 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2657 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2658 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2659 rowcount = 0; 2660 nzcount = 0; 2661 for (i=0; i<mbs; i++) { 2662 dcount = 0; 2663 odcount = 0; 2664 for (j=0; j<bs; j++) { 2665 kmax = locrowlens[rowcount]; 2666 for (k=0; k<kmax; k++) { 2667 tmp = mycols[nzcount++]/bs; /* block col. index */ 2668 if (!mask[tmp]) { 2669 mask[tmp] = 1; 2670 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2671 else masked1[dcount++] = tmp; /* entry in diag portion */ 2672 } 2673 } 2674 rowcount++; 2675 } 2676 2677 dlens[i] = dcount; /* d_nzz[i] */ 2678 odlens[i] = odcount; /* o_nzz[i] */ 2679 2680 /* zero out the mask elements we set */ 2681 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2682 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2683 } 2684 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2685 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2686 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2687 2688 if (!rank) { 2689 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2690 /* read in my part of the matrix numerical values */ 2691 nz = procsnz[0]; 2692 vals = buf; 2693 mycols = ibuf; 2694 if (size == 1) nz -= extra_rows; 2695 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2696 if (size == 1) { 2697 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2698 } 2699 2700 /* insert into matrix */ 2701 jj = rstart*bs; 2702 for (i=0; i<m; i++) { 2703 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2704 mycols += locrowlens[i]; 2705 vals += locrowlens[i]; 2706 jj++; 2707 } 2708 2709 /* read in other processors (except the last one) and ship out */ 2710 for (i=1; i<size-1; i++) { 2711 nz = procsnz[i]; 2712 vals = buf; 2713 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2714 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2715 } 2716 /* the last proc */ 2717 if (size != 1) { 2718 nz = procsnz[i] - extra_rows; 2719 vals = buf; 2720 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2721 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2722 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2723 } 2724 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2725 2726 } else { 2727 /* receive numeric values */ 2728 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2729 2730 /* receive message of values*/ 2731 vals = buf; 2732 mycols = ibuf; 2733 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2734 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2735 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2736 2737 /* insert into matrix */ 2738 jj = rstart*bs; 2739 for (i=0; i<m; i++) { 2740 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2741 mycols += locrowlens[i]; 2742 vals += locrowlens[i]; 2743 jj++; 2744 } 2745 } 2746 2747 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2748 ierr = PetscFree(buf);CHKERRQ(ierr); 2749 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2750 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2751 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2752 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2753 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2754 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2755 PetscFunctionReturn(0); 2756 } 2757 2758 #undef __FUNCT__ 2759 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2760 /*XXXXX@ 2761 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2762 2763 Input Parameters: 2764 . mat - the matrix 2765 . fact - factor 2766 2767 Not Collective on Mat, each process can have a different hash factor 2768 2769 Level: advanced 2770 2771 Notes: 2772 This can also be set by the command line option: -mat_use_hash_table fact 2773 2774 .keywords: matrix, hashtable, factor, HT 2775 2776 .seealso: MatSetOption() 2777 @XXXXX*/ 2778 2779 2780 #undef __FUNCT__ 2781 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2782 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2783 { 2784 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2785 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2786 PetscReal atmp; 2787 PetscReal *work,*svalues,*rvalues; 2788 PetscErrorCode ierr; 2789 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2790 PetscMPIInt rank,size; 2791 PetscInt *rowners_bs,dest,count,source; 2792 PetscScalar *va; 2793 MatScalar *ba; 2794 MPI_Status stat; 2795 2796 PetscFunctionBegin; 2797 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2798 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2799 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2800 2801 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2802 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2803 2804 bs = A->rmap->bs; 2805 mbs = a->mbs; 2806 Mbs = a->Mbs; 2807 ba = b->a; 2808 bi = b->i; 2809 bj = b->j; 2810 2811 /* find ownerships */ 2812 rowners_bs = A->rmap->range; 2813 2814 /* each proc creates an array to be distributed */ 2815 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2816 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2817 2818 /* row_max for B */ 2819 if (rank != size-1) { 2820 for (i=0; i<mbs; i++) { 2821 ncols = bi[1] - bi[0]; bi++; 2822 brow = bs*i; 2823 for (j=0; j<ncols; j++) { 2824 bcol = bs*(*bj); 2825 for (kcol=0; kcol<bs; kcol++) { 2826 col = bcol + kcol; /* local col index */ 2827 col += rowners_bs[rank+1]; /* global col index */ 2828 for (krow=0; krow<bs; krow++) { 2829 atmp = PetscAbsScalar(*ba); ba++; 2830 row = brow + krow; /* local row index */ 2831 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2832 if (work[col] < atmp) work[col] = atmp; 2833 } 2834 } 2835 bj++; 2836 } 2837 } 2838 2839 /* send values to its owners */ 2840 for (dest=rank+1; dest<size; dest++) { 2841 svalues = work + rowners_bs[dest]; 2842 count = rowners_bs[dest+1]-rowners_bs[dest]; 2843 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2844 } 2845 } 2846 2847 /* receive values */ 2848 if (rank) { 2849 rvalues = work; 2850 count = rowners_bs[rank+1]-rowners_bs[rank]; 2851 for (source=0; source<rank; source++) { 2852 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2853 /* process values */ 2854 for (i=0; i<count; i++) { 2855 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2856 } 2857 } 2858 } 2859 2860 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2861 ierr = PetscFree(work);CHKERRQ(ierr); 2862 PetscFunctionReturn(0); 2863 } 2864 2865 #undef __FUNCT__ 2866 #define __FUNCT__ "MatSOR_MPISBAIJ" 2867 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2868 { 2869 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2870 PetscErrorCode ierr; 2871 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2872 PetscScalar *x,*ptr,*from; 2873 Vec bb1; 2874 const PetscScalar *b; 2875 2876 PetscFunctionBegin; 2877 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2878 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2879 2880 if (flag == SOR_APPLY_UPPER) { 2881 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2882 PetscFunctionReturn(0); 2883 } 2884 2885 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2886 if (flag & SOR_ZERO_INITIAL_GUESS) { 2887 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2888 its--; 2889 } 2890 2891 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2892 while (its--) { 2893 2894 /* lower triangular part: slvec0b = - B^T*xx */ 2895 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2896 2897 /* copy xx into slvec0a */ 2898 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2899 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2900 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2901 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2902 2903 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2904 2905 /* copy bb into slvec1a */ 2906 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2907 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2908 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2909 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2910 2911 /* set slvec1b = 0 */ 2912 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2913 2914 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2915 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2916 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2917 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2918 2919 /* upper triangular part: bb1 = bb1 - B*x */ 2920 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2921 2922 /* local diagonal sweep */ 2923 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2924 } 2925 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2926 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2927 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2928 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2929 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2930 } else if (flag & SOR_EISENSTAT) { 2931 Vec xx1; 2932 PetscBool hasop; 2933 const PetscScalar *diag; 2934 PetscScalar *sl,scale = (omega - 2.0)/omega; 2935 PetscInt i,n; 2936 2937 if (!mat->xx1) { 2938 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2939 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2940 } 2941 xx1 = mat->xx1; 2942 bb1 = mat->bb1; 2943 2944 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2945 2946 if (!mat->diag) { 2947 /* this is wrong for same matrix with new nonzero values */ 2948 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2949 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2950 } 2951 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2952 2953 if (hasop) { 2954 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2955 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2956 } else { 2957 /* 2958 These two lines are replaced by code that may be a bit faster for a good compiler 2959 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2960 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2961 */ 2962 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2963 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2964 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2965 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2966 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2967 if (omega == 1.0) { 2968 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2969 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2970 } else { 2971 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2972 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2973 } 2974 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2975 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2976 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2977 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2978 } 2979 2980 /* multiply off-diagonal portion of matrix */ 2981 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2982 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2983 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2984 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2985 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2986 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2987 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2988 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2989 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2990 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2991 2992 /* local sweep */ 2993 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2994 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2995 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2996 PetscFunctionReturn(0); 2997 } 2998 2999 #undef __FUNCT__ 3000 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 3001 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3002 { 3003 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3004 PetscErrorCode ierr; 3005 Vec lvec1,bb1; 3006 3007 PetscFunctionBegin; 3008 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3009 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3010 3011 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3012 if (flag & SOR_ZERO_INITIAL_GUESS) { 3013 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3014 its--; 3015 } 3016 3017 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 3018 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3019 while (its--) { 3020 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3021 3022 /* lower diagonal part: bb1 = bb - B^T*xx */ 3023 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3024 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3025 3026 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3027 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3028 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3029 3030 /* upper diagonal part: bb1 = bb1 - B*x */ 3031 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3032 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3033 3034 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3035 3036 /* diagonal sweep */ 3037 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3038 } 3039 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3040 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3041 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3042 PetscFunctionReturn(0); 3043 } 3044 3045 #undef __FUNCT__ 3046 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 3047 /*@ 3048 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3049 CSR format the local rows. 3050 3051 Collective on MPI_Comm 3052 3053 Input Parameters: 3054 + comm - MPI communicator 3055 . bs - the block size, only a block size of 1 is supported 3056 . m - number of local rows (Cannot be PETSC_DECIDE) 3057 . n - This value should be the same as the local size used in creating the 3058 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3059 calculated if N is given) For square matrices n is almost always m. 3060 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3061 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3062 . i - row indices 3063 . j - column indices 3064 - a - matrix values 3065 3066 Output Parameter: 3067 . mat - the matrix 3068 3069 Level: intermediate 3070 3071 Notes: 3072 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3073 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3074 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3075 3076 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3077 3078 .keywords: matrix, aij, compressed row, sparse, parallel 3079 3080 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3081 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3082 @*/ 3083 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3084 { 3085 PetscErrorCode ierr; 3086 3087 3088 PetscFunctionBegin; 3089 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3090 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3091 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3092 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3093 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3094 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3095 PetscFunctionReturn(0); 3096 } 3097 3098 3099 #undef __FUNCT__ 3100 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 3101 /*@C 3102 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3103 (the default parallel PETSc format). 3104 3105 Collective on MPI_Comm 3106 3107 Input Parameters: 3108 + B - the matrix 3109 . bs - the block size 3110 . i - the indices into j for the start of each local row (starts with zero) 3111 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3112 - v - optional values in the matrix 3113 3114 Level: developer 3115 3116 .keywords: matrix, aij, compressed row, sparse, parallel 3117 3118 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3119 @*/ 3120 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3121 { 3122 PetscErrorCode ierr; 3123 3124 PetscFunctionBegin; 3125 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3126 PetscFunctionReturn(0); 3127 } 3128 3129 #undef __FUNCT__ 3130 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 3131 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3132 { 3133 PetscErrorCode ierr; 3134 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3135 PetscInt *indx; 3136 PetscScalar *values; 3137 3138 PetscFunctionBegin; 3139 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3140 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3141 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3142 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 3143 PetscInt *bindx,rmax=a->rmax,j; 3144 3145 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3146 mbs = m/bs; Nbs = N/cbs; 3147 if (n == PETSC_DECIDE) { 3148 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3149 } 3150 /* Check sum(n) = Nbs */ 3151 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3152 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3153 3154 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3155 rstart -= mbs; 3156 3157 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3158 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3159 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3160 for (i=0; i<mbs; i++) { 3161 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3162 nnz = nnz/bs; 3163 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3164 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3165 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3166 } 3167 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3168 ierr = PetscFree(bindx);CHKERRQ(ierr); 3169 3170 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3171 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3172 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3173 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 3174 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3175 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3176 } 3177 3178 /* numeric phase */ 3179 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3180 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3181 3182 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3183 for (i=0; i<m; i++) { 3184 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3185 Ii = i + rstart; 3186 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3187 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3188 } 3189 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3190 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3191 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3192 PetscFunctionReturn(0); 3193 } 3194