1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 12 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 19 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 20 PetscFunctionReturn(0); 21 } 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 25 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 26 { 27 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 32 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 33 PetscFunctionReturn(0); 34 } 35 36 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 37 { \ 38 \ 39 brow = row/bs; \ 40 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 41 rmax = aimax[brow]; nrow = ailen[brow]; \ 42 bcol = col/bs; \ 43 ridx = row % bs; cidx = col % bs; \ 44 low = 0; high = nrow; \ 45 while (high-low > 3) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > bcol) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > bcol) break; \ 52 if (rp[_i] == bcol) { \ 53 bap = ap + bs2*_i + bs*cidx + ridx; \ 54 if (addv == ADD_VALUES) *bap += value; \ 55 else *bap = value; \ 56 goto a_noinsert; \ 57 } \ 58 } \ 59 if (a->nonew == 1) goto a_noinsert; \ 60 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 61 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 62 N = nrow++ - 1; \ 63 /* shift up all the later entries in this row */ \ 64 for (ii=N; ii>=_i; ii--) { \ 65 rp[ii+1] = rp[ii]; \ 66 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 67 } \ 68 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 69 rp[_i] = bcol; \ 70 ap[bs2*_i + bs*cidx + ridx] = value; \ 71 A->nonzerostate++;\ 72 a_noinsert:; \ 73 ailen[brow] = nrow; \ 74 } 75 76 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 77 { \ 78 brow = row/bs; \ 79 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 80 rmax = bimax[brow]; nrow = bilen[brow]; \ 81 bcol = col/bs; \ 82 ridx = row % bs; cidx = col % bs; \ 83 low = 0; high = nrow; \ 84 while (high-low > 3) { \ 85 t = (low+high)/2; \ 86 if (rp[t] > bcol) high = t; \ 87 else low = t; \ 88 } \ 89 for (_i=low; _i<high; _i++) { \ 90 if (rp[_i] > bcol) break; \ 91 if (rp[_i] == bcol) { \ 92 bap = ap + bs2*_i + bs*cidx + ridx; \ 93 if (addv == ADD_VALUES) *bap += value; \ 94 else *bap = value; \ 95 goto b_noinsert; \ 96 } \ 97 } \ 98 if (b->nonew == 1) goto b_noinsert; \ 99 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 100 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for (ii=N; ii>=_i; ii--) { \ 104 rp[ii+1] = rp[ii]; \ 105 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 106 } \ 107 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 B->nonzerostate++;\ 111 b_noinsert:; \ 112 bilen[brow] = nrow; \ 113 } 114 115 /* Only add/insert a(i,j) with i<=j (blocks). 116 Any a(i,j) with i>j input by user is ingored. 117 */ 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatSetValues_MPISBAIJ" 120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 121 { 122 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 123 MatScalar value; 124 PetscBool roworiented = baij->roworiented; 125 PetscErrorCode ierr; 126 PetscInt i,j,row,col; 127 PetscInt rstart_orig=mat->rmap->rstart; 128 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 129 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 130 131 /* Some Variables required in the macro */ 132 Mat A = baij->A; 133 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 134 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 135 MatScalar *aa =a->a; 136 137 Mat B = baij->B; 138 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 139 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 140 MatScalar *ba =b->a; 141 142 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 143 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 144 MatScalar *ap,*bap; 145 146 /* for stash */ 147 PetscInt n_loc, *in_loc = NULL; 148 MatScalar *v_loc = NULL; 149 150 PetscFunctionBegin; 151 if (!baij->donotstash) { 152 if (n > baij->n_loc) { 153 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 154 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 155 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 156 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 157 158 baij->n_loc = n; 159 } 160 in_loc = baij->in_loc; 161 v_loc = baij->v_loc; 162 } 163 164 for (i=0; i<m; i++) { 165 if (im[i] < 0) continue; 166 #if defined(PETSC_USE_DEBUG) 167 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 168 #endif 169 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 170 row = im[i] - rstart_orig; /* local row index */ 171 for (j=0; j<n; j++) { 172 if (im[i]/bs > in[j]/bs) { 173 if (a->ignore_ltriangular) { 174 continue; /* ignore lower triangular blocks */ 175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 176 } 177 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 178 col = in[j] - cstart_orig; /* local col index */ 179 brow = row/bs; bcol = col/bs; 180 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 181 if (roworiented) value = v[i*n+j]; 182 else value = v[i+j*m]; 183 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 184 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 185 } else if (in[j] < 0) continue; 186 #if defined(PETSC_USE_DEBUG) 187 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 188 #endif 189 else { /* off-diag entry (B) */ 190 if (mat->was_assembled) { 191 if (!baij->colmap) { 192 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 193 } 194 #if defined(PETSC_USE_CTABLE) 195 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 196 col = col - 1; 197 #else 198 col = baij->colmap[in[j]/bs] - 1; 199 #endif 200 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 201 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 202 col = in[j]; 203 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 204 B = baij->B; 205 b = (Mat_SeqBAIJ*)(B)->data; 206 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 207 ba = b->a; 208 } else col += in[j]%bs; 209 } else col = in[j]; 210 if (roworiented) value = v[i*n+j]; 211 else value = v[i+j*m]; 212 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 213 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 } 216 } else { /* off processor entry */ 217 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 218 if (!baij->donotstash) { 219 mat->assembled = PETSC_FALSE; 220 n_loc = 0; 221 for (j=0; j<n; j++) { 222 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 223 in_loc[n_loc] = in[j]; 224 if (roworiented) { 225 v_loc[n_loc] = v[i*n+j]; 226 } else { 227 v_loc[n_loc] = v[j*m+i]; 228 } 229 n_loc++; 230 } 231 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined" 240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 241 { 242 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 243 PetscErrorCode ierr; 244 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 245 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 246 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 247 PetscBool roworiented=a->roworiented; 248 const PetscScalar *value = v; 249 MatScalar *ap,*aa = a->a,*bap; 250 251 PetscFunctionBegin; 252 if (col < row) { 253 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 254 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 255 } 256 rp = aj + ai[row]; 257 ap = aa + bs2*ai[row]; 258 rmax = imax[row]; 259 nrow = ailen[row]; 260 value = v; 261 low = 0; 262 high = nrow; 263 264 while (high-low > 7) { 265 t = (low+high)/2; 266 if (rp[t] > col) high = t; 267 else low = t; 268 } 269 for (i=low; i<high; i++) { 270 if (rp[i] > col) break; 271 if (rp[i] == col) { 272 bap = ap + bs2*i; 273 if (roworiented) { 274 if (is == ADD_VALUES) { 275 for (ii=0; ii<bs; ii++) { 276 for (jj=ii; jj<bs2; jj+=bs) { 277 bap[jj] += *value++; 278 } 279 } 280 } else { 281 for (ii=0; ii<bs; ii++) { 282 for (jj=ii; jj<bs2; jj+=bs) { 283 bap[jj] = *value++; 284 } 285 } 286 } 287 } else { 288 if (is == ADD_VALUES) { 289 for (ii=0; ii<bs; ii++) { 290 for (jj=0; jj<bs; jj++) { 291 *bap++ += *value++; 292 } 293 } 294 } else { 295 for (ii=0; ii<bs; ii++) { 296 for (jj=0; jj<bs; jj++) { 297 *bap++ = *value++; 298 } 299 } 300 } 301 } 302 goto noinsert2; 303 } 304 } 305 if (nonew == 1) goto noinsert2; 306 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 307 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 308 N = nrow++ - 1; high++; 309 /* shift up all the later entries in this row */ 310 for (ii=N; ii>=i; ii--) { 311 rp[ii+1] = rp[ii]; 312 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 313 } 314 if (N >= i) { 315 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 316 } 317 rp[i] = col; 318 bap = ap + bs2*i; 319 if (roworiented) { 320 for (ii=0; ii<bs; ii++) { 321 for (jj=ii; jj<bs2; jj+=bs) { 322 bap[jj] = *value++; 323 } 324 } 325 } else { 326 for (ii=0; ii<bs; ii++) { 327 for (jj=0; jj<bs; jj++) { 328 *bap++ = *value++; 329 } 330 } 331 } 332 noinsert2:; 333 ailen[row] = nrow; 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined" 339 /* 340 This routine is exactly duplicated in mpibaij.c 341 */ 342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 343 { 344 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 345 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 346 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 347 PetscErrorCode ierr; 348 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 349 PetscBool roworiented=a->roworiented; 350 const PetscScalar *value = v; 351 MatScalar *ap,*aa = a->a,*bap; 352 353 PetscFunctionBegin; 354 rp = aj + ai[row]; 355 ap = aa + bs2*ai[row]; 356 rmax = imax[row]; 357 nrow = ailen[row]; 358 low = 0; 359 high = nrow; 360 value = v; 361 while (high-low > 7) { 362 t = (low+high)/2; 363 if (rp[t] > col) high = t; 364 else low = t; 365 } 366 for (i=low; i<high; i++) { 367 if (rp[i] > col) break; 368 if (rp[i] == col) { 369 bap = ap + bs2*i; 370 if (roworiented) { 371 if (is == ADD_VALUES) { 372 for (ii=0; ii<bs; ii++) { 373 for (jj=ii; jj<bs2; jj+=bs) { 374 bap[jj] += *value++; 375 } 376 } 377 } else { 378 for (ii=0; ii<bs; ii++) { 379 for (jj=ii; jj<bs2; jj+=bs) { 380 bap[jj] = *value++; 381 } 382 } 383 } 384 } else { 385 if (is == ADD_VALUES) { 386 for (ii=0; ii<bs; ii++,value+=bs) { 387 for (jj=0; jj<bs; jj++) { 388 bap[jj] += value[jj]; 389 } 390 bap += bs; 391 } 392 } else { 393 for (ii=0; ii<bs; ii++,value+=bs) { 394 for (jj=0; jj<bs; jj++) { 395 bap[jj] = value[jj]; 396 } 397 bap += bs; 398 } 399 } 400 } 401 goto noinsert2; 402 } 403 } 404 if (nonew == 1) goto noinsert2; 405 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 406 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 407 N = nrow++ - 1; high++; 408 /* shift up all the later entries in this row */ 409 for (ii=N; ii>=i; ii--) { 410 rp[ii+1] = rp[ii]; 411 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 412 } 413 if (N >= i) { 414 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 415 } 416 rp[i] = col; 417 bap = ap + bs2*i; 418 if (roworiented) { 419 for (ii=0; ii<bs; ii++) { 420 for (jj=ii; jj<bs2; jj+=bs) { 421 bap[jj] = *value++; 422 } 423 } 424 } else { 425 for (ii=0; ii<bs; ii++) { 426 for (jj=0; jj<bs; jj++) { 427 *bap++ = *value++; 428 } 429 } 430 } 431 noinsert2:; 432 ailen[row] = nrow; 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNCT__ 437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 438 /* 439 This routine could be optimized by removing the need for the block copy below and passing stride information 440 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 441 */ 442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 443 { 444 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 445 const MatScalar *value; 446 MatScalar *barray =baij->barray; 447 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 448 PetscErrorCode ierr; 449 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 450 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 451 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 452 453 PetscFunctionBegin; 454 if (!barray) { 455 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 456 baij->barray = barray; 457 } 458 459 if (roworiented) { 460 stepval = (n-1)*bs; 461 } else { 462 stepval = (m-1)*bs; 463 } 464 for (i=0; i<m; i++) { 465 if (im[i] < 0) continue; 466 #if defined(PETSC_USE_DEBUG) 467 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 468 #endif 469 if (im[i] >= rstart && im[i] < rend) { 470 row = im[i] - rstart; 471 for (j=0; j<n; j++) { 472 if (im[i] > in[j]) { 473 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 474 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 475 } 476 /* If NumCol = 1 then a copy is not required */ 477 if ((roworiented) && (n == 1)) { 478 barray = (MatScalar*) v + i*bs2; 479 } else if ((!roworiented) && (m == 1)) { 480 barray = (MatScalar*) v + j*bs2; 481 } else { /* Here a copy is required */ 482 if (roworiented) { 483 value = v + i*(stepval+bs)*bs + j*bs; 484 } else { 485 value = v + j*(stepval+bs)*bs + i*bs; 486 } 487 for (ii=0; ii<bs; ii++,value+=stepval) { 488 for (jj=0; jj<bs; jj++) { 489 *barray++ = *value++; 490 } 491 } 492 barray -=bs2; 493 } 494 495 if (in[j] >= cstart && in[j] < cend) { 496 col = in[j] - cstart; 497 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 498 } else if (in[j] < 0) continue; 499 #if defined(PETSC_USE_DEBUG) 500 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 501 #endif 502 else { 503 if (mat->was_assembled) { 504 if (!baij->colmap) { 505 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 506 } 507 508 #if defined(PETSC_USE_DEBUG) 509 #if defined(PETSC_USE_CTABLE) 510 { PetscInt data; 511 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 512 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 513 } 514 #else 515 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 516 #endif 517 #endif 518 #if defined(PETSC_USE_CTABLE) 519 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col = (col - 1)/bs; 521 #else 522 col = (baij->colmap[in[j]] - 1)/bs; 523 #endif 524 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 } 528 } else col = in[j]; 529 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 530 } 531 } 532 } else { 533 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 534 if (!baij->donotstash) { 535 if (roworiented) { 536 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 537 } else { 538 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 539 } 540 } 541 } 542 } 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_MPISBAIJ" 548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 549 { 550 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 551 PetscErrorCode ierr; 552 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 553 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 554 555 PetscFunctionBegin; 556 for (i=0; i<m; i++) { 557 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 558 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 559 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 560 row = idxm[i] - bsrstart; 561 for (j=0; j<n; j++) { 562 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 563 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 564 if (idxn[j] >= bscstart && idxn[j] < bscend) { 565 col = idxn[j] - bscstart; 566 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 567 } else { 568 if (!baij->colmap) { 569 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 570 } 571 #if defined(PETSC_USE_CTABLE) 572 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 573 data--; 574 #else 575 data = baij->colmap[idxn[j]/bs]-1; 576 #endif 577 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 578 else { 579 col = data + idxn[j]%bs; 580 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } 582 } 583 } 584 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatNorm_MPISBAIJ" 591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 592 { 593 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscReal sum[2],*lnorm2; 596 597 PetscFunctionBegin; 598 if (baij->size == 1) { 599 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 600 } else { 601 if (type == NORM_FROBENIUS) { 602 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 603 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 605 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 606 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 607 ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 608 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 609 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 610 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 611 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 612 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 613 PetscReal *rsum,*rsum2,vabs; 614 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 615 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 616 MatScalar *v; 617 618 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 619 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 620 /* Amat */ 621 v = amat->a; jj = amat->j; 622 for (brow=0; brow<mbs; brow++) { 623 grow = bs*(rstart + brow); 624 nz = amat->i[brow+1] - amat->i[brow]; 625 for (bcol=0; bcol<nz; bcol++) { 626 gcol = bs*(rstart + *jj); jj++; 627 for (col=0; col<bs; col++) { 628 for (row=0; row<bs; row++) { 629 vabs = PetscAbsScalar(*v); v++; 630 rsum[gcol+col] += vabs; 631 /* non-diagonal block */ 632 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 633 } 634 } 635 } 636 } 637 /* Bmat */ 638 v = bmat->a; jj = bmat->j; 639 for (brow=0; brow<mbs; brow++) { 640 grow = bs*(rstart + brow); 641 nz = bmat->i[brow+1] - bmat->i[brow]; 642 for (bcol=0; bcol<nz; bcol++) { 643 gcol = bs*garray[*jj]; jj++; 644 for (col=0; col<bs; col++) { 645 for (row=0; row<bs; row++) { 646 vabs = PetscAbsScalar(*v); v++; 647 rsum[gcol+col] += vabs; 648 rsum[grow+row] += vabs; 649 } 650 } 651 } 652 } 653 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 654 *norm = 0.0; 655 for (col=0; col<mat->cmap->N; col++) { 656 if (rsum2[col] > *norm) *norm = rsum2[col]; 657 } 658 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 659 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 660 } 661 PetscFunctionReturn(0); 662 } 663 664 #undef __FUNCT__ 665 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 666 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 667 { 668 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 669 PetscErrorCode ierr; 670 PetscInt nstash,reallocs; 671 InsertMode addv; 672 673 PetscFunctionBegin; 674 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 675 676 /* make sure all processors are either in INSERTMODE or ADDMODE */ 677 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 678 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 679 mat->insertmode = addv; /* in case this processor had no cache */ 680 681 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 682 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 683 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 684 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 685 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 686 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 692 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 693 { 694 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 695 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 696 PetscErrorCode ierr; 697 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 698 PetscInt *row,*col; 699 PetscBool other_disassembled; 700 PetscMPIInt n; 701 PetscBool r1,r2,r3; 702 MatScalar *val; 703 InsertMode addv = mat->insertmode; 704 705 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 706 PetscFunctionBegin; 707 if (!baij->donotstash && !mat->nooffprocentries) { 708 while (1) { 709 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 710 if (!flg) break; 711 712 for (i=0; i<n;) { 713 /* Now identify the consecutive vals belonging to the same row */ 714 for (j=i,rstart=row[j]; j<n; j++) { 715 if (row[j] != rstart) break; 716 } 717 if (j < n) ncols = j-i; 718 else ncols = n-i; 719 /* Now assemble all these values with a single function call */ 720 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 721 i = j; 722 } 723 } 724 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 725 /* Now process the block-stash. Since the values are stashed column-oriented, 726 set the roworiented flag to column oriented, and after MatSetValues() 727 restore the original flags */ 728 r1 = baij->roworiented; 729 r2 = a->roworiented; 730 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 731 732 baij->roworiented = PETSC_FALSE; 733 a->roworiented = PETSC_FALSE; 734 735 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 736 while (1) { 737 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 738 if (!flg) break; 739 740 for (i=0; i<n;) { 741 /* Now identify the consecutive vals belonging to the same row */ 742 for (j=i,rstart=row[j]; j<n; j++) { 743 if (row[j] != rstart) break; 744 } 745 if (j < n) ncols = j-i; 746 else ncols = n-i; 747 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 748 i = j; 749 } 750 } 751 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 752 753 baij->roworiented = r1; 754 a->roworiented = r2; 755 756 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 757 } 758 759 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 760 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 761 762 /* determine if any processor has disassembled, if so we must 763 also disassemble ourselfs, in order that we may reassemble. */ 764 /* 765 if nonzero structure of submatrix B cannot change then we know that 766 no processor disassembled thus we can skip this stuff 767 */ 768 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 769 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 770 if (mat->was_assembled && !other_disassembled) { 771 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 772 } 773 } 774 775 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 776 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 777 } 778 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 779 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 780 781 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 782 783 baij->rowvalues = 0; 784 785 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 786 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 787 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 788 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 789 } 790 PetscFunctionReturn(0); 791 } 792 793 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer); 794 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 795 #include <petscdraw.h> 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 798 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 799 { 800 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 801 PetscErrorCode ierr; 802 PetscInt bs = mat->rmap->bs; 803 PetscMPIInt rank = baij->rank; 804 PetscBool iascii,isdraw; 805 PetscViewer sviewer; 806 PetscViewerFormat format; 807 808 PetscFunctionBegin; 809 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 810 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 811 if (iascii) { 812 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 813 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 814 MatInfo info; 815 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 816 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 817 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 818 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 819 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 820 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 821 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 822 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 823 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 824 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 825 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 826 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } else if (format == PETSC_VIEWER_ASCII_INFO) { 829 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 830 PetscFunctionReturn(0); 831 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 832 PetscFunctionReturn(0); 833 } 834 } 835 836 if (isdraw) { 837 PetscDraw draw; 838 PetscBool isnull; 839 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 840 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 841 } 842 843 { 844 /* assemble the entire matrix onto first processor. */ 845 Mat A; 846 Mat_SeqSBAIJ *Aloc; 847 Mat_SeqBAIJ *Bloc; 848 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 849 MatScalar *a; 850 const char *matname; 851 852 /* Should this be the same type as mat? */ 853 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 854 if (!rank) { 855 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 856 } else { 857 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 858 } 859 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 860 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 861 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 862 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 863 864 /* copy over the A part */ 865 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 866 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 867 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 868 869 for (i=0; i<mbs; i++) { 870 rvals[0] = bs*(baij->rstartbs + i); 871 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 872 for (j=ai[i]; j<ai[i+1]; j++) { 873 col = (baij->cstartbs+aj[j])*bs; 874 for (k=0; k<bs; k++) { 875 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 876 col++; 877 a += bs; 878 } 879 } 880 } 881 /* copy over the B part */ 882 Bloc = (Mat_SeqBAIJ*)baij->B->data; 883 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 884 for (i=0; i<mbs; i++) { 885 886 rvals[0] = bs*(baij->rstartbs + i); 887 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 888 for (j=ai[i]; j<ai[i+1]; j++) { 889 col = baij->garray[aj[j]]*bs; 890 for (k=0; k<bs; k++) { 891 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 892 col++; 893 a += bs; 894 } 895 } 896 } 897 ierr = PetscFree(rvals);CHKERRQ(ierr); 898 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 899 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 900 /* 901 Everyone has to call to draw the matrix since the graphics waits are 902 synchronized across all processors that share the PetscDraw object 903 */ 904 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 905 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 906 if (!rank) { 907 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 908 ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 909 } 910 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 911 ierr = MatDestroy(&A);CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } 915 916 #undef __FUNCT__ 917 #define __FUNCT__ "MatView_MPISBAIJ" 918 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 919 { 920 PetscErrorCode ierr; 921 PetscBool iascii,isdraw,issocket,isbinary; 922 923 PetscFunctionBegin; 924 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 925 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 926 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 927 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 928 if (iascii || isdraw || issocket || isbinary) { 929 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 930 } 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatDestroy_MPISBAIJ" 936 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 937 { 938 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 939 PetscErrorCode ierr; 940 941 PetscFunctionBegin; 942 #if defined(PETSC_USE_LOG) 943 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 944 #endif 945 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 946 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 947 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 948 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 949 #if defined(PETSC_USE_CTABLE) 950 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 951 #else 952 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 953 #endif 954 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 955 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 956 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 957 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 958 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 959 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 960 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 961 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 962 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 963 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 964 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 965 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 966 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 967 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 968 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 969 #if defined(PETSC_USE_REAL_MAT_SINGLE) 970 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 971 #endif 972 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 973 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 974 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 975 ierr = PetscFree(mat->data);CHKERRQ(ierr); 976 977 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 978 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 979 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 980 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 981 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 982 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 983 #if defined(PETSC_HAVE_ELEMENTAL) 984 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 985 #endif 986 PetscFunctionReturn(0); 987 } 988 989 #undef __FUNCT__ 990 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 991 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 992 { 993 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 994 PetscErrorCode ierr; 995 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 996 PetscScalar *x,*from; 997 998 PetscFunctionBegin; 999 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1000 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1001 1002 /* diagonal part */ 1003 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1004 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1005 1006 /* subdiagonal part */ 1007 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1008 1009 /* copy x into the vec slvec0 */ 1010 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1011 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1012 1013 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1014 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1015 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1016 1017 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1018 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1019 /* supperdiagonal part */ 1020 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatMult_MPISBAIJ" 1026 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1027 { 1028 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1029 PetscErrorCode ierr; 1030 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1031 PetscScalar *from; 1032 const PetscScalar *x; 1033 1034 PetscFunctionBegin; 1035 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1036 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1037 1038 /* diagonal part */ 1039 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1040 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1041 1042 /* subdiagonal part */ 1043 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1044 1045 /* copy x into the vec slvec0 */ 1046 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1047 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1048 1049 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1050 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1051 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1052 1053 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1054 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1055 /* supperdiagonal part */ 1056 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1057 PetscFunctionReturn(0); 1058 } 1059 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1062 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1063 { 1064 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1065 PetscErrorCode ierr; 1066 PetscInt nt; 1067 1068 PetscFunctionBegin; 1069 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1070 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1071 1072 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1073 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1074 1075 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1076 /* do diagonal part */ 1077 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1078 /* do supperdiagonal part */ 1079 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1080 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1081 /* do subdiagonal part */ 1082 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1083 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1090 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1091 { 1092 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1093 PetscErrorCode ierr; 1094 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1095 PetscScalar *from,zero=0.0; 1096 const PetscScalar *x; 1097 1098 PetscFunctionBegin; 1099 /* 1100 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1101 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1102 */ 1103 /* diagonal part */ 1104 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1105 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1106 1107 /* subdiagonal part */ 1108 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1109 1110 /* copy x into the vec slvec0 */ 1111 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1112 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1113 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1114 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1115 1116 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1118 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 1120 /* supperdiagonal part */ 1121 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1127 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1128 { 1129 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1130 PetscErrorCode ierr; 1131 1132 PetscFunctionBegin; 1133 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1134 /* do diagonal part */ 1135 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1136 /* do supperdiagonal part */ 1137 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1138 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1139 1140 /* do subdiagonal part */ 1141 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1142 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 /* 1148 This only works correctly for square matrices where the subblock A->A is the 1149 diagonal block 1150 */ 1151 #undef __FUNCT__ 1152 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1153 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1154 { 1155 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1156 PetscErrorCode ierr; 1157 1158 PetscFunctionBegin; 1159 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1160 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatScale_MPISBAIJ" 1166 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1167 { 1168 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1169 PetscErrorCode ierr; 1170 1171 PetscFunctionBegin; 1172 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1173 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1179 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1180 { 1181 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1182 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1183 PetscErrorCode ierr; 1184 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1185 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1186 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1187 1188 PetscFunctionBegin; 1189 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1190 mat->getrowactive = PETSC_TRUE; 1191 1192 if (!mat->rowvalues && (idx || v)) { 1193 /* 1194 allocate enough space to hold information from the longest row. 1195 */ 1196 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1197 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1198 PetscInt max = 1,mbs = mat->mbs,tmp; 1199 for (i=0; i<mbs; i++) { 1200 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1201 if (max < tmp) max = tmp; 1202 } 1203 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1204 } 1205 1206 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1207 lrow = row - brstart; /* local row index */ 1208 1209 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1210 if (!v) {pvA = 0; pvB = 0;} 1211 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1212 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1213 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1214 nztot = nzA + nzB; 1215 1216 cmap = mat->garray; 1217 if (v || idx) { 1218 if (nztot) { 1219 /* Sort by increasing column numbers, assuming A and B already sorted */ 1220 PetscInt imark = -1; 1221 if (v) { 1222 *v = v_p = mat->rowvalues; 1223 for (i=0; i<nzB; i++) { 1224 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1225 else break; 1226 } 1227 imark = i; 1228 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1229 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1230 } 1231 if (idx) { 1232 *idx = idx_p = mat->rowindices; 1233 if (imark > -1) { 1234 for (i=0; i<imark; i++) { 1235 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1236 } 1237 } else { 1238 for (i=0; i<nzB; i++) { 1239 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1240 else break; 1241 } 1242 imark = i; 1243 } 1244 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1245 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1246 } 1247 } else { 1248 if (idx) *idx = 0; 1249 if (v) *v = 0; 1250 } 1251 } 1252 *nz = nztot; 1253 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1254 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1260 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1261 { 1262 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1263 1264 PetscFunctionBegin; 1265 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1266 baij->getrowactive = PETSC_FALSE; 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1272 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1273 { 1274 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1275 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1276 1277 PetscFunctionBegin; 1278 aA->getrow_utriangular = PETSC_TRUE; 1279 PetscFunctionReturn(0); 1280 } 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1283 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1284 { 1285 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1286 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1287 1288 PetscFunctionBegin; 1289 aA->getrow_utriangular = PETSC_FALSE; 1290 PetscFunctionReturn(0); 1291 } 1292 1293 #undef __FUNCT__ 1294 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1295 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1296 { 1297 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1298 PetscErrorCode ierr; 1299 1300 PetscFunctionBegin; 1301 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1302 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1308 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1309 { 1310 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1311 PetscErrorCode ierr; 1312 1313 PetscFunctionBegin; 1314 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1315 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ() 1320 Input: isrow - distributed(parallel), 1321 iscol_local - locally owned (seq) 1322 */ 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "ISEqual_private" 1325 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1326 { 1327 PetscErrorCode ierr; 1328 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1329 const PetscInt *ptr1,*ptr2; 1330 1331 PetscFunctionBegin; 1332 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1333 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1334 if (sz1 > sz2) { 1335 *flg = PETSC_FALSE; 1336 PetscFunctionReturn(0); 1337 } 1338 1339 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1340 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1341 1342 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1343 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1344 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1345 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1346 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1347 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1348 1349 nmatch=0; 1350 k = 0; 1351 for (i=0; i<sz1; i++){ 1352 for (j=k; j<sz2; j++){ 1353 if (a1[i] == a2[j]) { 1354 k = j; nmatch++; 1355 break; 1356 } 1357 } 1358 } 1359 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1360 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1361 ierr = PetscFree(a1);CHKERRQ(ierr); 1362 ierr = PetscFree(a2);CHKERRQ(ierr); 1363 if (nmatch < sz1) { 1364 *flg = PETSC_FALSE; 1365 } else { 1366 *flg = PETSC_TRUE; 1367 } 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ" 1373 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1374 { 1375 PetscErrorCode ierr; 1376 IS iscol_local; 1377 PetscInt csize; 1378 PetscBool isequal; 1379 1380 PetscFunctionBegin; 1381 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1382 if (call == MAT_REUSE_MATRIX) { 1383 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1384 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1385 } else { 1386 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1387 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1388 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1389 } 1390 1391 /* now call MatGetSubMatrix_MPIBAIJ() */ 1392 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1393 if (call == MAT_INITIAL_MATRIX) { 1394 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1395 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1396 } 1397 PetscFunctionReturn(0); 1398 } 1399 1400 #undef __FUNCT__ 1401 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1402 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1403 { 1404 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1405 PetscErrorCode ierr; 1406 1407 PetscFunctionBegin; 1408 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1409 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1415 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1416 { 1417 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1418 Mat A = a->A,B = a->B; 1419 PetscErrorCode ierr; 1420 PetscReal isend[5],irecv[5]; 1421 1422 PetscFunctionBegin; 1423 info->block_size = (PetscReal)matin->rmap->bs; 1424 1425 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1426 1427 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1428 isend[3] = info->memory; isend[4] = info->mallocs; 1429 1430 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1431 1432 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1433 isend[3] += info->memory; isend[4] += info->mallocs; 1434 if (flag == MAT_LOCAL) { 1435 info->nz_used = isend[0]; 1436 info->nz_allocated = isend[1]; 1437 info->nz_unneeded = isend[2]; 1438 info->memory = isend[3]; 1439 info->mallocs = isend[4]; 1440 } else if (flag == MAT_GLOBAL_MAX) { 1441 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1442 1443 info->nz_used = irecv[0]; 1444 info->nz_allocated = irecv[1]; 1445 info->nz_unneeded = irecv[2]; 1446 info->memory = irecv[3]; 1447 info->mallocs = irecv[4]; 1448 } else if (flag == MAT_GLOBAL_SUM) { 1449 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1450 1451 info->nz_used = irecv[0]; 1452 info->nz_allocated = irecv[1]; 1453 info->nz_unneeded = irecv[2]; 1454 info->memory = irecv[3]; 1455 info->mallocs = irecv[4]; 1456 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1457 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1458 info->fill_ratio_needed = 0; 1459 info->factor_mallocs = 0; 1460 PetscFunctionReturn(0); 1461 } 1462 1463 #undef __FUNCT__ 1464 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1465 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1466 { 1467 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1468 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1469 PetscErrorCode ierr; 1470 1471 PetscFunctionBegin; 1472 switch (op) { 1473 case MAT_NEW_NONZERO_LOCATIONS: 1474 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1475 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1476 case MAT_KEEP_NONZERO_PATTERN: 1477 case MAT_NEW_NONZERO_LOCATION_ERR: 1478 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1479 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1480 break; 1481 case MAT_ROW_ORIENTED: 1482 a->roworiented = flg; 1483 1484 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1485 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1486 break; 1487 case MAT_NEW_DIAGONALS: 1488 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1489 break; 1490 case MAT_IGNORE_OFF_PROC_ENTRIES: 1491 a->donotstash = flg; 1492 break; 1493 case MAT_USE_HASH_TABLE: 1494 a->ht_flag = flg; 1495 break; 1496 case MAT_HERMITIAN: 1497 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1498 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1499 1500 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1501 break; 1502 case MAT_SPD: 1503 A->spd_set = PETSC_TRUE; 1504 A->spd = flg; 1505 if (flg) { 1506 A->symmetric = PETSC_TRUE; 1507 A->structurally_symmetric = PETSC_TRUE; 1508 A->symmetric_set = PETSC_TRUE; 1509 A->structurally_symmetric_set = PETSC_TRUE; 1510 } 1511 break; 1512 case MAT_SYMMETRIC: 1513 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1514 break; 1515 case MAT_STRUCTURALLY_SYMMETRIC: 1516 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1517 break; 1518 case MAT_SYMMETRY_ETERNAL: 1519 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1520 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1521 break; 1522 case MAT_IGNORE_LOWER_TRIANGULAR: 1523 aA->ignore_ltriangular = flg; 1524 break; 1525 case MAT_ERROR_LOWER_TRIANGULAR: 1526 aA->ignore_ltriangular = flg; 1527 break; 1528 case MAT_GETROW_UPPERTRIANGULAR: 1529 aA->getrow_utriangular = flg; 1530 break; 1531 default: 1532 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1533 } 1534 PetscFunctionReturn(0); 1535 } 1536 1537 #undef __FUNCT__ 1538 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1539 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1540 { 1541 PetscErrorCode ierr; 1542 1543 PetscFunctionBegin; 1544 if (MAT_INITIAL_MATRIX || *B != A) { 1545 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1546 } 1547 PetscFunctionReturn(0); 1548 } 1549 1550 #undef __FUNCT__ 1551 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1552 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1553 { 1554 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1555 Mat a = baij->A, b=baij->B; 1556 PetscErrorCode ierr; 1557 PetscInt nv,m,n; 1558 PetscBool flg; 1559 1560 PetscFunctionBegin; 1561 if (ll != rr) { 1562 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1563 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1564 } 1565 if (!ll) PetscFunctionReturn(0); 1566 1567 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1568 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1569 1570 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1571 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1572 1573 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1574 1575 /* left diagonalscale the off-diagonal part */ 1576 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1577 1578 /* scale the diagonal part */ 1579 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1580 1581 /* right diagonalscale the off-diagonal part */ 1582 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1583 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1584 PetscFunctionReturn(0); 1585 } 1586 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1589 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1590 { 1591 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1592 PetscErrorCode ierr; 1593 1594 PetscFunctionBegin; 1595 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1596 PetscFunctionReturn(0); 1597 } 1598 1599 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1600 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatEqual_MPISBAIJ" 1603 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1604 { 1605 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1606 Mat a,b,c,d; 1607 PetscBool flg; 1608 PetscErrorCode ierr; 1609 1610 PetscFunctionBegin; 1611 a = matA->A; b = matA->B; 1612 c = matB->A; d = matB->B; 1613 1614 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1615 if (flg) { 1616 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1617 } 1618 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1619 PetscFunctionReturn(0); 1620 } 1621 1622 #undef __FUNCT__ 1623 #define __FUNCT__ "MatCopy_MPISBAIJ" 1624 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1625 { 1626 PetscErrorCode ierr; 1627 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1628 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1629 1630 PetscFunctionBegin; 1631 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1632 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1633 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1634 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1635 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1636 } else { 1637 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1638 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1639 } 1640 PetscFunctionReturn(0); 1641 } 1642 1643 #undef __FUNCT__ 1644 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1645 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1646 { 1647 PetscErrorCode ierr; 1648 1649 PetscFunctionBegin; 1650 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1651 PetscFunctionReturn(0); 1652 } 1653 1654 #undef __FUNCT__ 1655 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1656 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1657 { 1658 PetscErrorCode ierr; 1659 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1660 PetscBLASInt bnz,one=1; 1661 Mat_SeqSBAIJ *xa,*ya; 1662 Mat_SeqBAIJ *xb,*yb; 1663 1664 PetscFunctionBegin; 1665 if (str == SAME_NONZERO_PATTERN) { 1666 PetscScalar alpha = a; 1667 xa = (Mat_SeqSBAIJ*)xx->A->data; 1668 ya = (Mat_SeqSBAIJ*)yy->A->data; 1669 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1670 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1671 xb = (Mat_SeqBAIJ*)xx->B->data; 1672 yb = (Mat_SeqBAIJ*)yy->B->data; 1673 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1674 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1675 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1676 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1677 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1678 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1679 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1680 } else { 1681 Mat B; 1682 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1683 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1684 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1685 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1686 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1687 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1688 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1689 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1690 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1691 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1692 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1693 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1694 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1695 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1696 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1697 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 1698 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1699 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1700 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1701 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1702 } 1703 PetscFunctionReturn(0); 1704 } 1705 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1708 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1709 { 1710 PetscErrorCode ierr; 1711 PetscInt i; 1712 PetscBool flg,sorted; 1713 1714 PetscFunctionBegin; 1715 for (i = 0; i < n; i++) { 1716 ierr = ISSorted(irow[i],&sorted);CHKERRQ(ierr); 1717 if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row index set %d not sorted",i); 1718 ierr = ISSorted(icol[i],&sorted);CHKERRQ(ierr); 1719 if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Column index set %d not sorted",i); 1720 } 1721 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1722 for (i=0; i<n; i++) { 1723 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1724 if (!flg) { /* *B[i] is non-symmetric, set flag */ 1725 ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr); 1726 } 1727 } 1728 PetscFunctionReturn(0); 1729 } 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatShift_MPISBAIJ" 1733 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1734 { 1735 PetscErrorCode ierr; 1736 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1737 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data,*bij = (Mat_SeqSBAIJ*)maij->B->data; 1738 1739 PetscFunctionBegin; 1740 if (!aij->nz && !bij->nz) { 1741 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1742 } 1743 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1744 PetscFunctionReturn(0); 1745 } 1746 1747 /* -------------------------------------------------------------------*/ 1748 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1749 MatGetRow_MPISBAIJ, 1750 MatRestoreRow_MPISBAIJ, 1751 MatMult_MPISBAIJ, 1752 /* 4*/ MatMultAdd_MPISBAIJ, 1753 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1754 MatMultAdd_MPISBAIJ, 1755 0, 1756 0, 1757 0, 1758 /* 10*/ 0, 1759 0, 1760 0, 1761 MatSOR_MPISBAIJ, 1762 MatTranspose_MPISBAIJ, 1763 /* 15*/ MatGetInfo_MPISBAIJ, 1764 MatEqual_MPISBAIJ, 1765 MatGetDiagonal_MPISBAIJ, 1766 MatDiagonalScale_MPISBAIJ, 1767 MatNorm_MPISBAIJ, 1768 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1769 MatAssemblyEnd_MPISBAIJ, 1770 MatSetOption_MPISBAIJ, 1771 MatZeroEntries_MPISBAIJ, 1772 /* 24*/ 0, 1773 0, 1774 0, 1775 0, 1776 0, 1777 /* 29*/ MatSetUp_MPISBAIJ, 1778 0, 1779 0, 1780 0, 1781 0, 1782 /* 34*/ MatDuplicate_MPISBAIJ, 1783 0, 1784 0, 1785 0, 1786 0, 1787 /* 39*/ MatAXPY_MPISBAIJ, 1788 MatGetSubMatrices_MPISBAIJ, 1789 MatIncreaseOverlap_MPISBAIJ, 1790 MatGetValues_MPISBAIJ, 1791 MatCopy_MPISBAIJ, 1792 /* 44*/ 0, 1793 MatScale_MPISBAIJ, 1794 MatShift_MPISBAIJ, 1795 0, 1796 0, 1797 /* 49*/ 0, 1798 0, 1799 0, 1800 0, 1801 0, 1802 /* 54*/ 0, 1803 0, 1804 MatSetUnfactored_MPISBAIJ, 1805 0, 1806 MatSetValuesBlocked_MPISBAIJ, 1807 /* 59*/ MatGetSubMatrix_MPISBAIJ, 1808 0, 1809 0, 1810 0, 1811 0, 1812 /* 64*/ 0, 1813 0, 1814 0, 1815 0, 1816 0, 1817 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1818 0, 1819 0, 1820 0, 1821 0, 1822 /* 74*/ 0, 1823 0, 1824 0, 1825 0, 1826 0, 1827 /* 79*/ 0, 1828 0, 1829 0, 1830 0, 1831 MatLoad_MPISBAIJ, 1832 /* 84*/ 0, 1833 0, 1834 0, 1835 0, 1836 0, 1837 /* 89*/ 0, 1838 0, 1839 0, 1840 0, 1841 0, 1842 /* 94*/ 0, 1843 0, 1844 0, 1845 0, 1846 0, 1847 /* 99*/ 0, 1848 0, 1849 0, 1850 0, 1851 0, 1852 /*104*/ 0, 1853 MatRealPart_MPISBAIJ, 1854 MatImaginaryPart_MPISBAIJ, 1855 MatGetRowUpperTriangular_MPISBAIJ, 1856 MatRestoreRowUpperTriangular_MPISBAIJ, 1857 /*109*/ 0, 1858 0, 1859 0, 1860 0, 1861 0, 1862 /*114*/ 0, 1863 0, 1864 0, 1865 0, 1866 0, 1867 /*119*/ 0, 1868 0, 1869 0, 1870 0, 1871 0, 1872 /*124*/ 0, 1873 0, 1874 0, 1875 0, 1876 0, 1877 /*129*/ 0, 1878 0, 1879 0, 1880 0, 1881 0, 1882 /*134*/ 0, 1883 0, 1884 0, 1885 0, 1886 0, 1887 /*139*/ 0, 1888 0, 1889 0, 1890 0, 1891 0, 1892 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1893 }; 1894 1895 #undef __FUNCT__ 1896 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1897 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1898 { 1899 PetscFunctionBegin; 1900 *a = ((Mat_MPISBAIJ*)A->data)->A; 1901 PetscFunctionReturn(0); 1902 } 1903 1904 #undef __FUNCT__ 1905 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1906 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1907 { 1908 Mat_MPISBAIJ *b; 1909 PetscErrorCode ierr; 1910 PetscInt i,mbs,Mbs; 1911 1912 PetscFunctionBegin; 1913 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1914 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1915 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1916 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1917 1918 b = (Mat_MPISBAIJ*)B->data; 1919 mbs = B->rmap->n/bs; 1920 Mbs = B->rmap->N/bs; 1921 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1922 1923 B->rmap->bs = bs; 1924 b->bs2 = bs*bs; 1925 b->mbs = mbs; 1926 b->Mbs = Mbs; 1927 b->nbs = B->cmap->n/bs; 1928 b->Nbs = B->cmap->N/bs; 1929 1930 for (i=0; i<=b->size; i++) { 1931 b->rangebs[i] = B->rmap->range[i]/bs; 1932 } 1933 b->rstartbs = B->rmap->rstart/bs; 1934 b->rendbs = B->rmap->rend/bs; 1935 1936 b->cstartbs = B->cmap->rstart/bs; 1937 b->cendbs = B->cmap->rend/bs; 1938 1939 if (!B->preallocated) { 1940 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1941 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1942 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1943 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1944 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1945 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1946 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1947 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1948 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1949 } 1950 1951 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1952 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1953 1954 B->preallocated = PETSC_TRUE; 1955 PetscFunctionReturn(0); 1956 } 1957 1958 #undef __FUNCT__ 1959 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1960 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1961 { 1962 PetscInt m,rstart,cstart,cend; 1963 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1964 const PetscInt *JJ =0; 1965 PetscScalar *values=0; 1966 PetscErrorCode ierr; 1967 1968 PetscFunctionBegin; 1969 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1970 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1971 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1972 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1973 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1974 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1975 m = B->rmap->n/bs; 1976 rstart = B->rmap->rstart/bs; 1977 cstart = B->cmap->rstart/bs; 1978 cend = B->cmap->rend/bs; 1979 1980 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1981 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 1982 for (i=0; i<m; i++) { 1983 nz = ii[i+1] - ii[i]; 1984 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1985 nz_max = PetscMax(nz_max,nz); 1986 JJ = jj + ii[i]; 1987 for (j=0; j<nz; j++) { 1988 if (*JJ >= cstart) break; 1989 JJ++; 1990 } 1991 d = 0; 1992 for (; j<nz; j++) { 1993 if (*JJ++ >= cend) break; 1994 d++; 1995 } 1996 d_nnz[i] = d; 1997 o_nnz[i] = nz - d; 1998 } 1999 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2000 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2001 2002 values = (PetscScalar*)V; 2003 if (!values) { 2004 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2005 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2006 } 2007 for (i=0; i<m; i++) { 2008 PetscInt row = i + rstart; 2009 PetscInt ncols = ii[i+1] - ii[i]; 2010 const PetscInt *icols = jj + ii[i]; 2011 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2012 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2013 } 2014 2015 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2016 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2017 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2018 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2019 PetscFunctionReturn(0); 2020 } 2021 2022 /*MC 2023 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2024 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2025 the matrix is stored. 2026 2027 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2028 can call MatSetOption(Mat, MAT_HERMITIAN); 2029 2030 Options Database Keys: 2031 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2032 2033 Level: beginner 2034 2035 .seealso: MatCreateMPISBAIJ 2036 M*/ 2037 2038 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 2039 2040 #undef __FUNCT__ 2041 #define __FUNCT__ "MatCreate_MPISBAIJ" 2042 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2043 { 2044 Mat_MPISBAIJ *b; 2045 PetscErrorCode ierr; 2046 PetscBool flg = PETSC_FALSE; 2047 2048 PetscFunctionBegin; 2049 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2050 B->data = (void*)b; 2051 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2052 2053 B->ops->destroy = MatDestroy_MPISBAIJ; 2054 B->ops->view = MatView_MPISBAIJ; 2055 B->assembled = PETSC_FALSE; 2056 B->insertmode = NOT_SET_VALUES; 2057 2058 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2059 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2060 2061 /* build local table of row and column ownerships */ 2062 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2063 2064 /* build cache for off array entries formed */ 2065 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2066 2067 b->donotstash = PETSC_FALSE; 2068 b->colmap = NULL; 2069 b->garray = NULL; 2070 b->roworiented = PETSC_TRUE; 2071 2072 /* stuff used in block assembly */ 2073 b->barray = 0; 2074 2075 /* stuff used for matrix vector multiply */ 2076 b->lvec = 0; 2077 b->Mvctx = 0; 2078 b->slvec0 = 0; 2079 b->slvec0b = 0; 2080 b->slvec1 = 0; 2081 b->slvec1a = 0; 2082 b->slvec1b = 0; 2083 b->sMvctx = 0; 2084 2085 /* stuff for MatGetRow() */ 2086 b->rowindices = 0; 2087 b->rowvalues = 0; 2088 b->getrowactive = PETSC_FALSE; 2089 2090 /* hash table stuff */ 2091 b->ht = 0; 2092 b->hd = 0; 2093 b->ht_size = 0; 2094 b->ht_flag = PETSC_FALSE; 2095 b->ht_fact = 0; 2096 b->ht_total_ct = 0; 2097 b->ht_insert_ct = 0; 2098 2099 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 2100 b->ijonly = PETSC_FALSE; 2101 2102 b->in_loc = 0; 2103 b->v_loc = 0; 2104 b->n_loc = 0; 2105 2106 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2108 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 2109 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2111 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 2112 #if defined(PETSC_HAVE_ELEMENTAL) 2113 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2114 #endif 2115 2116 B->symmetric = PETSC_TRUE; 2117 B->structurally_symmetric = PETSC_TRUE; 2118 B->symmetric_set = PETSC_TRUE; 2119 B->structurally_symmetric_set = PETSC_TRUE; 2120 2121 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2122 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2123 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2124 if (flg) { 2125 PetscReal fact = 1.39; 2126 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2127 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2128 if (fact <= 1.0) fact = 1.39; 2129 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2130 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2131 } 2132 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2133 PetscFunctionReturn(0); 2134 } 2135 2136 /*MC 2137 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2138 2139 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2140 and MATMPISBAIJ otherwise. 2141 2142 Options Database Keys: 2143 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2144 2145 Level: beginner 2146 2147 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2148 M*/ 2149 2150 #undef __FUNCT__ 2151 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 2152 /*@C 2153 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2154 the user should preallocate the matrix storage by setting the parameters 2155 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2156 performance can be increased by more than a factor of 50. 2157 2158 Collective on Mat 2159 2160 Input Parameters: 2161 + B - the matrix 2162 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2163 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2164 . d_nz - number of block nonzeros per block row in diagonal portion of local 2165 submatrix (same for all local rows) 2166 . d_nnz - array containing the number of block nonzeros in the various block rows 2167 in the upper triangular and diagonal part of the in diagonal portion of the local 2168 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2169 for the diagonal entry and set a value even if it is zero. 2170 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2171 submatrix (same for all local rows). 2172 - o_nnz - array containing the number of nonzeros in the various block rows of the 2173 off-diagonal portion of the local submatrix that is right of the diagonal 2174 (possibly different for each block row) or NULL. 2175 2176 2177 Options Database Keys: 2178 . -mat_no_unroll - uses code that does not unroll the loops in the 2179 block calculations (much slower) 2180 . -mat_block_size - size of the blocks to use 2181 2182 Notes: 2183 2184 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2185 than it must be used on all processors that share the object for that argument. 2186 2187 If the *_nnz parameter is given then the *_nz parameter is ignored 2188 2189 Storage Information: 2190 For a square global matrix we define each processor's diagonal portion 2191 to be its local rows and the corresponding columns (a square submatrix); 2192 each processor's off-diagonal portion encompasses the remainder of the 2193 local matrix (a rectangular submatrix). 2194 2195 The user can specify preallocated storage for the diagonal part of 2196 the local submatrix with either d_nz or d_nnz (not both). Set 2197 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2198 memory allocation. Likewise, specify preallocated storage for the 2199 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2200 2201 You can call MatGetInfo() to get information on how effective the preallocation was; 2202 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2203 You can also run with the option -info and look for messages with the string 2204 malloc in them to see if additional memory allocation was needed. 2205 2206 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2207 the figure below we depict these three local rows and all columns (0-11). 2208 2209 .vb 2210 0 1 2 3 4 5 6 7 8 9 10 11 2211 -------------------------- 2212 row 3 |. . . d d d o o o o o o 2213 row 4 |. . . d d d o o o o o o 2214 row 5 |. . . d d d o o o o o o 2215 -------------------------- 2216 .ve 2217 2218 Thus, any entries in the d locations are stored in the d (diagonal) 2219 submatrix, and any entries in the o locations are stored in the 2220 o (off-diagonal) submatrix. Note that the d matrix is stored in 2221 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2222 2223 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2224 plus the diagonal part of the d matrix, 2225 and o_nz should indicate the number of block nonzeros per row in the o matrix 2226 2227 In general, for PDE problems in which most nonzeros are near the diagonal, 2228 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2229 or you will get TERRIBLE performance; see the users' manual chapter on 2230 matrices. 2231 2232 Level: intermediate 2233 2234 .keywords: matrix, block, aij, compressed row, sparse, parallel 2235 2236 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2237 @*/ 2238 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2239 { 2240 PetscErrorCode ierr; 2241 2242 PetscFunctionBegin; 2243 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2244 PetscValidType(B,1); 2245 PetscValidLogicalCollectiveInt(B,bs,2); 2246 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2247 PetscFunctionReturn(0); 2248 } 2249 2250 #undef __FUNCT__ 2251 #define __FUNCT__ "MatCreateSBAIJ" 2252 /*@C 2253 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2254 (block compressed row). For good matrix assembly performance 2255 the user should preallocate the matrix storage by setting the parameters 2256 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2257 performance can be increased by more than a factor of 50. 2258 2259 Collective on MPI_Comm 2260 2261 Input Parameters: 2262 + comm - MPI communicator 2263 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2264 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2265 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2266 This value should be the same as the local size used in creating the 2267 y vector for the matrix-vector product y = Ax. 2268 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2269 This value should be the same as the local size used in creating the 2270 x vector for the matrix-vector product y = Ax. 2271 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2272 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2273 . d_nz - number of block nonzeros per block row in diagonal portion of local 2274 submatrix (same for all local rows) 2275 . d_nnz - array containing the number of block nonzeros in the various block rows 2276 in the upper triangular portion of the in diagonal portion of the local 2277 (possibly different for each block block row) or NULL. 2278 If you plan to factor the matrix you must leave room for the diagonal entry and 2279 set its value even if it is zero. 2280 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2281 submatrix (same for all local rows). 2282 - o_nnz - array containing the number of nonzeros in the various block rows of the 2283 off-diagonal portion of the local submatrix (possibly different for 2284 each block row) or NULL. 2285 2286 Output Parameter: 2287 . A - the matrix 2288 2289 Options Database Keys: 2290 . -mat_no_unroll - uses code that does not unroll the loops in the 2291 block calculations (much slower) 2292 . -mat_block_size - size of the blocks to use 2293 . -mat_mpi - use the parallel matrix data structures even on one processor 2294 (defaults to using SeqBAIJ format on one processor) 2295 2296 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2297 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2298 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2299 2300 Notes: 2301 The number of rows and columns must be divisible by blocksize. 2302 This matrix type does not support complex Hermitian operation. 2303 2304 The user MUST specify either the local or global matrix dimensions 2305 (possibly both). 2306 2307 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2308 than it must be used on all processors that share the object for that argument. 2309 2310 If the *_nnz parameter is given then the *_nz parameter is ignored 2311 2312 Storage Information: 2313 For a square global matrix we define each processor's diagonal portion 2314 to be its local rows and the corresponding columns (a square submatrix); 2315 each processor's off-diagonal portion encompasses the remainder of the 2316 local matrix (a rectangular submatrix). 2317 2318 The user can specify preallocated storage for the diagonal part of 2319 the local submatrix with either d_nz or d_nnz (not both). Set 2320 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2321 memory allocation. Likewise, specify preallocated storage for the 2322 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2323 2324 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2325 the figure below we depict these three local rows and all columns (0-11). 2326 2327 .vb 2328 0 1 2 3 4 5 6 7 8 9 10 11 2329 -------------------------- 2330 row 3 |. . . d d d o o o o o o 2331 row 4 |. . . d d d o o o o o o 2332 row 5 |. . . d d d o o o o o o 2333 -------------------------- 2334 .ve 2335 2336 Thus, any entries in the d locations are stored in the d (diagonal) 2337 submatrix, and any entries in the o locations are stored in the 2338 o (off-diagonal) submatrix. Note that the d matrix is stored in 2339 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2340 2341 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2342 plus the diagonal part of the d matrix, 2343 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2344 In general, for PDE problems in which most nonzeros are near the diagonal, 2345 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2346 or you will get TERRIBLE performance; see the users' manual chapter on 2347 matrices. 2348 2349 Level: intermediate 2350 2351 .keywords: matrix, block, aij, compressed row, sparse, parallel 2352 2353 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2354 @*/ 2355 2356 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2357 { 2358 PetscErrorCode ierr; 2359 PetscMPIInt size; 2360 2361 PetscFunctionBegin; 2362 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2363 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2364 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2365 if (size > 1) { 2366 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2367 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2368 } else { 2369 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2370 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2371 } 2372 PetscFunctionReturn(0); 2373 } 2374 2375 2376 #undef __FUNCT__ 2377 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2378 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2379 { 2380 Mat mat; 2381 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2382 PetscErrorCode ierr; 2383 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2384 PetscScalar *array; 2385 2386 PetscFunctionBegin; 2387 *newmat = 0; 2388 2389 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2390 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2391 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2392 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2393 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2394 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2395 2396 mat->factortype = matin->factortype; 2397 mat->preallocated = PETSC_TRUE; 2398 mat->assembled = PETSC_TRUE; 2399 mat->insertmode = NOT_SET_VALUES; 2400 2401 a = (Mat_MPISBAIJ*)mat->data; 2402 a->bs2 = oldmat->bs2; 2403 a->mbs = oldmat->mbs; 2404 a->nbs = oldmat->nbs; 2405 a->Mbs = oldmat->Mbs; 2406 a->Nbs = oldmat->Nbs; 2407 2408 2409 a->size = oldmat->size; 2410 a->rank = oldmat->rank; 2411 a->donotstash = oldmat->donotstash; 2412 a->roworiented = oldmat->roworiented; 2413 a->rowindices = 0; 2414 a->rowvalues = 0; 2415 a->getrowactive = PETSC_FALSE; 2416 a->barray = 0; 2417 a->rstartbs = oldmat->rstartbs; 2418 a->rendbs = oldmat->rendbs; 2419 a->cstartbs = oldmat->cstartbs; 2420 a->cendbs = oldmat->cendbs; 2421 2422 /* hash table stuff */ 2423 a->ht = 0; 2424 a->hd = 0; 2425 a->ht_size = 0; 2426 a->ht_flag = oldmat->ht_flag; 2427 a->ht_fact = oldmat->ht_fact; 2428 a->ht_total_ct = 0; 2429 a->ht_insert_ct = 0; 2430 2431 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2432 if (oldmat->colmap) { 2433 #if defined(PETSC_USE_CTABLE) 2434 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2435 #else 2436 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2437 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2438 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2439 #endif 2440 } else a->colmap = 0; 2441 2442 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2443 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2444 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2445 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2446 } else a->garray = 0; 2447 2448 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2449 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2450 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2451 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2452 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2453 2454 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2455 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2456 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2457 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2458 2459 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2460 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2461 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2462 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2463 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2464 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2465 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2466 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2467 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2468 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2469 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2470 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2471 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2472 2473 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2474 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2475 a->sMvctx = oldmat->sMvctx; 2476 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2477 2478 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2479 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2480 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2481 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2482 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2483 *newmat = mat; 2484 PetscFunctionReturn(0); 2485 } 2486 2487 #undef __FUNCT__ 2488 #define __FUNCT__ "MatLoad_MPISBAIJ" 2489 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2490 { 2491 PetscErrorCode ierr; 2492 PetscInt i,nz,j,rstart,rend; 2493 PetscScalar *vals,*buf; 2494 MPI_Comm comm; 2495 MPI_Status status; 2496 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2497 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2498 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2499 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2500 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2501 PetscInt dcount,kmax,k,nzcount,tmp; 2502 int fd; 2503 2504 PetscFunctionBegin; 2505 /* force binary viewer to load .info file if it has not yet done so */ 2506 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2507 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2508 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2509 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2510 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2511 if (bs < 0) bs = 1; 2512 2513 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2514 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2515 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2516 if (!rank) { 2517 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2518 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2519 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2520 } 2521 2522 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2523 M = header[1]; 2524 N = header[2]; 2525 2526 /* If global sizes are set, check if they are consistent with that given in the file */ 2527 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2528 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2529 2530 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2531 2532 /* 2533 This code adds extra rows to make sure the number of rows is 2534 divisible by the blocksize 2535 */ 2536 Mbs = M/bs; 2537 extra_rows = bs - M + bs*(Mbs); 2538 if (extra_rows == bs) extra_rows = 0; 2539 else Mbs++; 2540 if (extra_rows &&!rank) { 2541 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2542 } 2543 2544 /* determine ownership of all rows */ 2545 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2546 mbs = Mbs/size + ((Mbs % size) > rank); 2547 m = mbs*bs; 2548 } else { /* User Set */ 2549 m = newmat->rmap->n; 2550 mbs = m/bs; 2551 } 2552 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2553 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2554 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2555 rowners[0] = 0; 2556 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2557 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2558 rstart = rowners[rank]; 2559 rend = rowners[rank+1]; 2560 2561 /* distribute row lengths to all processors */ 2562 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2563 if (!rank) { 2564 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2565 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2566 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2567 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2568 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2569 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2570 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2571 } else { 2572 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2573 } 2574 2575 if (!rank) { /* procs[0] */ 2576 /* calculate the number of nonzeros on each processor */ 2577 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2578 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2579 for (i=0; i<size; i++) { 2580 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2581 procsnz[i] += rowlengths[j]; 2582 } 2583 } 2584 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2585 2586 /* determine max buffer needed and allocate it */ 2587 maxnz = 0; 2588 for (i=0; i<size; i++) { 2589 maxnz = PetscMax(maxnz,procsnz[i]); 2590 } 2591 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2592 2593 /* read in my part of the matrix column indices */ 2594 nz = procsnz[0]; 2595 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2596 mycols = ibuf; 2597 if (size == 1) nz -= extra_rows; 2598 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2599 if (size == 1) { 2600 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2601 } 2602 2603 /* read in every ones (except the last) and ship off */ 2604 for (i=1; i<size-1; i++) { 2605 nz = procsnz[i]; 2606 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2607 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2608 } 2609 /* read in the stuff for the last proc */ 2610 if (size != 1) { 2611 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2612 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2613 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2614 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2615 } 2616 ierr = PetscFree(cols);CHKERRQ(ierr); 2617 } else { /* procs[i], i>0 */ 2618 /* determine buffer space needed for message */ 2619 nz = 0; 2620 for (i=0; i<m; i++) nz += locrowlens[i]; 2621 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2622 mycols = ibuf; 2623 /* receive message of column indices*/ 2624 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2625 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2626 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2627 } 2628 2629 /* loop over local rows, determining number of off diagonal entries */ 2630 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2631 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2632 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2633 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2634 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2635 rowcount = 0; 2636 nzcount = 0; 2637 for (i=0; i<mbs; i++) { 2638 dcount = 0; 2639 odcount = 0; 2640 for (j=0; j<bs; j++) { 2641 kmax = locrowlens[rowcount]; 2642 for (k=0; k<kmax; k++) { 2643 tmp = mycols[nzcount++]/bs; /* block col. index */ 2644 if (!mask[tmp]) { 2645 mask[tmp] = 1; 2646 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2647 else masked1[dcount++] = tmp; /* entry in diag portion */ 2648 } 2649 } 2650 rowcount++; 2651 } 2652 2653 dlens[i] = dcount; /* d_nzz[i] */ 2654 odlens[i] = odcount; /* o_nzz[i] */ 2655 2656 /* zero out the mask elements we set */ 2657 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2658 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2659 } 2660 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2661 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2662 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2663 2664 if (!rank) { 2665 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2666 /* read in my part of the matrix numerical values */ 2667 nz = procsnz[0]; 2668 vals = buf; 2669 mycols = ibuf; 2670 if (size == 1) nz -= extra_rows; 2671 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2672 if (size == 1) { 2673 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2674 } 2675 2676 /* insert into matrix */ 2677 jj = rstart*bs; 2678 for (i=0; i<m; i++) { 2679 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2680 mycols += locrowlens[i]; 2681 vals += locrowlens[i]; 2682 jj++; 2683 } 2684 2685 /* read in other processors (except the last one) and ship out */ 2686 for (i=1; i<size-1; i++) { 2687 nz = procsnz[i]; 2688 vals = buf; 2689 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2690 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2691 } 2692 /* the last proc */ 2693 if (size != 1) { 2694 nz = procsnz[i] - extra_rows; 2695 vals = buf; 2696 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2697 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2698 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2699 } 2700 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2701 2702 } else { 2703 /* receive numeric values */ 2704 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2705 2706 /* receive message of values*/ 2707 vals = buf; 2708 mycols = ibuf; 2709 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2710 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2711 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2712 2713 /* insert into matrix */ 2714 jj = rstart*bs; 2715 for (i=0; i<m; i++) { 2716 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2717 mycols += locrowlens[i]; 2718 vals += locrowlens[i]; 2719 jj++; 2720 } 2721 } 2722 2723 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2724 ierr = PetscFree(buf);CHKERRQ(ierr); 2725 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2726 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2727 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2728 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2729 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2730 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2731 PetscFunctionReturn(0); 2732 } 2733 2734 #undef __FUNCT__ 2735 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2736 /*XXXXX@ 2737 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2738 2739 Input Parameters: 2740 . mat - the matrix 2741 . fact - factor 2742 2743 Not Collective on Mat, each process can have a different hash factor 2744 2745 Level: advanced 2746 2747 Notes: 2748 This can also be set by the command line option: -mat_use_hash_table fact 2749 2750 .keywords: matrix, hashtable, factor, HT 2751 2752 .seealso: MatSetOption() 2753 @XXXXX*/ 2754 2755 2756 #undef __FUNCT__ 2757 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2758 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2759 { 2760 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2761 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2762 PetscReal atmp; 2763 PetscReal *work,*svalues,*rvalues; 2764 PetscErrorCode ierr; 2765 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2766 PetscMPIInt rank,size; 2767 PetscInt *rowners_bs,dest,count,source; 2768 PetscScalar *va; 2769 MatScalar *ba; 2770 MPI_Status stat; 2771 2772 PetscFunctionBegin; 2773 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2774 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2775 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2776 2777 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2778 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2779 2780 bs = A->rmap->bs; 2781 mbs = a->mbs; 2782 Mbs = a->Mbs; 2783 ba = b->a; 2784 bi = b->i; 2785 bj = b->j; 2786 2787 /* find ownerships */ 2788 rowners_bs = A->rmap->range; 2789 2790 /* each proc creates an array to be distributed */ 2791 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2792 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2793 2794 /* row_max for B */ 2795 if (rank != size-1) { 2796 for (i=0; i<mbs; i++) { 2797 ncols = bi[1] - bi[0]; bi++; 2798 brow = bs*i; 2799 for (j=0; j<ncols; j++) { 2800 bcol = bs*(*bj); 2801 for (kcol=0; kcol<bs; kcol++) { 2802 col = bcol + kcol; /* local col index */ 2803 col += rowners_bs[rank+1]; /* global col index */ 2804 for (krow=0; krow<bs; krow++) { 2805 atmp = PetscAbsScalar(*ba); ba++; 2806 row = brow + krow; /* local row index */ 2807 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2808 if (work[col] < atmp) work[col] = atmp; 2809 } 2810 } 2811 bj++; 2812 } 2813 } 2814 2815 /* send values to its owners */ 2816 for (dest=rank+1; dest<size; dest++) { 2817 svalues = work + rowners_bs[dest]; 2818 count = rowners_bs[dest+1]-rowners_bs[dest]; 2819 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2820 } 2821 } 2822 2823 /* receive values */ 2824 if (rank) { 2825 rvalues = work; 2826 count = rowners_bs[rank+1]-rowners_bs[rank]; 2827 for (source=0; source<rank; source++) { 2828 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2829 /* process values */ 2830 for (i=0; i<count; i++) { 2831 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2832 } 2833 } 2834 } 2835 2836 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2837 ierr = PetscFree(work);CHKERRQ(ierr); 2838 PetscFunctionReturn(0); 2839 } 2840 2841 #undef __FUNCT__ 2842 #define __FUNCT__ "MatSOR_MPISBAIJ" 2843 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2844 { 2845 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2846 PetscErrorCode ierr; 2847 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2848 PetscScalar *x,*ptr,*from; 2849 Vec bb1; 2850 const PetscScalar *b; 2851 2852 PetscFunctionBegin; 2853 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2854 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2855 2856 if (flag == SOR_APPLY_UPPER) { 2857 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2858 PetscFunctionReturn(0); 2859 } 2860 2861 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2862 if (flag & SOR_ZERO_INITIAL_GUESS) { 2863 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2864 its--; 2865 } 2866 2867 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2868 while (its--) { 2869 2870 /* lower triangular part: slvec0b = - B^T*xx */ 2871 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2872 2873 /* copy xx into slvec0a */ 2874 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2875 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2876 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2877 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2878 2879 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2880 2881 /* copy bb into slvec1a */ 2882 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2883 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2884 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2885 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2886 2887 /* set slvec1b = 0 */ 2888 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2889 2890 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2891 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2892 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2893 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2894 2895 /* upper triangular part: bb1 = bb1 - B*x */ 2896 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2897 2898 /* local diagonal sweep */ 2899 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2900 } 2901 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2902 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2903 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2904 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2905 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2906 } else if (flag & SOR_EISENSTAT) { 2907 Vec xx1; 2908 PetscBool hasop; 2909 const PetscScalar *diag; 2910 PetscScalar *sl,scale = (omega - 2.0)/omega; 2911 PetscInt i,n; 2912 2913 if (!mat->xx1) { 2914 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2915 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2916 } 2917 xx1 = mat->xx1; 2918 bb1 = mat->bb1; 2919 2920 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2921 2922 if (!mat->diag) { 2923 /* this is wrong for same matrix with new nonzero values */ 2924 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2925 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2926 } 2927 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2928 2929 if (hasop) { 2930 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2931 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2932 } else { 2933 /* 2934 These two lines are replaced by code that may be a bit faster for a good compiler 2935 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2936 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2937 */ 2938 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2939 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2940 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2941 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2942 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2943 if (omega == 1.0) { 2944 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2945 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2946 } else { 2947 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2948 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2949 } 2950 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2951 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2952 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2953 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2954 } 2955 2956 /* multiply off-diagonal portion of matrix */ 2957 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2958 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2959 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2960 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2961 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2962 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2963 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2964 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2965 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2966 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2967 2968 /* local sweep */ 2969 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2970 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2971 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2972 PetscFunctionReturn(0); 2973 } 2974 2975 #undef __FUNCT__ 2976 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2977 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2978 { 2979 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2980 PetscErrorCode ierr; 2981 Vec lvec1,bb1; 2982 2983 PetscFunctionBegin; 2984 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2985 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2986 2987 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2988 if (flag & SOR_ZERO_INITIAL_GUESS) { 2989 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2990 its--; 2991 } 2992 2993 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2994 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2995 while (its--) { 2996 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2997 2998 /* lower diagonal part: bb1 = bb - B^T*xx */ 2999 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3000 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3001 3002 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3003 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3004 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3005 3006 /* upper diagonal part: bb1 = bb1 - B*x */ 3007 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3008 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3009 3010 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3011 3012 /* diagonal sweep */ 3013 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3014 } 3015 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3016 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3017 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3018 PetscFunctionReturn(0); 3019 } 3020 3021 #undef __FUNCT__ 3022 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 3023 /*@ 3024 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3025 CSR format the local rows. 3026 3027 Collective on MPI_Comm 3028 3029 Input Parameters: 3030 + comm - MPI communicator 3031 . bs - the block size, only a block size of 1 is supported 3032 . m - number of local rows (Cannot be PETSC_DECIDE) 3033 . n - This value should be the same as the local size used in creating the 3034 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3035 calculated if N is given) For square matrices n is almost always m. 3036 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3037 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3038 . i - row indices 3039 . j - column indices 3040 - a - matrix values 3041 3042 Output Parameter: 3043 . mat - the matrix 3044 3045 Level: intermediate 3046 3047 Notes: 3048 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3049 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3050 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3051 3052 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3053 3054 .keywords: matrix, aij, compressed row, sparse, parallel 3055 3056 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3057 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3058 @*/ 3059 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3060 { 3061 PetscErrorCode ierr; 3062 3063 3064 PetscFunctionBegin; 3065 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3066 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3067 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3068 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3069 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3070 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3071 PetscFunctionReturn(0); 3072 } 3073 3074 3075 #undef __FUNCT__ 3076 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 3077 /*@C 3078 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3079 (the default parallel PETSc format). 3080 3081 Collective on MPI_Comm 3082 3083 Input Parameters: 3084 + B - the matrix 3085 . bs - the block size 3086 . i - the indices into j for the start of each local row (starts with zero) 3087 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3088 - v - optional values in the matrix 3089 3090 Level: developer 3091 3092 .keywords: matrix, aij, compressed row, sparse, parallel 3093 3094 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3095 @*/ 3096 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3097 { 3098 PetscErrorCode ierr; 3099 3100 PetscFunctionBegin; 3101 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3102 PetscFunctionReturn(0); 3103 } 3104 3105 #undef __FUNCT__ 3106 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 3107 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3108 { 3109 PetscErrorCode ierr; 3110 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3111 PetscInt *indx; 3112 PetscScalar *values; 3113 3114 PetscFunctionBegin; 3115 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3116 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3117 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3118 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 3119 PetscInt *bindx,rmax=a->rmax,j; 3120 3121 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3122 mbs = m/bs; Nbs = N/cbs; 3123 if (n == PETSC_DECIDE) { 3124 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3125 } 3126 /* Check sum(n) = Nbs */ 3127 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3128 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3129 3130 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3131 rstart -= mbs; 3132 3133 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3134 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3135 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3136 for (i=0; i<mbs; i++) { 3137 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3138 nnz = nnz/bs; 3139 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3140 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3141 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3142 } 3143 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3144 ierr = PetscFree(bindx);CHKERRQ(ierr); 3145 3146 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3147 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3148 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3149 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 3150 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3151 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3152 } 3153 3154 /* numeric phase */ 3155 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3156 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3157 3158 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3159 for (i=0; i<m; i++) { 3160 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3161 Ii = i + rstart; 3162 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3163 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3164 } 3165 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3166 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3167 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3168 PetscFunctionReturn(0); 3169 } 3170