1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 12 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 19 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 20 PetscFunctionReturn(0); 21 } 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 25 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 26 { 27 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 32 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 33 PetscFunctionReturn(0); 34 } 35 36 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 37 { \ 38 \ 39 brow = row/bs; \ 40 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 41 rmax = aimax[brow]; nrow = ailen[brow]; \ 42 bcol = col/bs; \ 43 ridx = row % bs; cidx = col % bs; \ 44 low = 0; high = nrow; \ 45 while (high-low > 3) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > bcol) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > bcol) break; \ 52 if (rp[_i] == bcol) { \ 53 bap = ap + bs2*_i + bs*cidx + ridx; \ 54 if (addv == ADD_VALUES) *bap += value; \ 55 else *bap = value; \ 56 goto a_noinsert; \ 57 } \ 58 } \ 59 if (a->nonew == 1) goto a_noinsert; \ 60 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 61 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 62 N = nrow++ - 1; \ 63 /* shift up all the later entries in this row */ \ 64 for (ii=N; ii>=_i; ii--) { \ 65 rp[ii+1] = rp[ii]; \ 66 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 67 } \ 68 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 69 rp[_i] = bcol; \ 70 ap[bs2*_i + bs*cidx + ridx] = value; \ 71 A->nonzerostate++;\ 72 a_noinsert:; \ 73 ailen[brow] = nrow; \ 74 } 75 76 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 77 { \ 78 brow = row/bs; \ 79 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 80 rmax = bimax[brow]; nrow = bilen[brow]; \ 81 bcol = col/bs; \ 82 ridx = row % bs; cidx = col % bs; \ 83 low = 0; high = nrow; \ 84 while (high-low > 3) { \ 85 t = (low+high)/2; \ 86 if (rp[t] > bcol) high = t; \ 87 else low = t; \ 88 } \ 89 for (_i=low; _i<high; _i++) { \ 90 if (rp[_i] > bcol) break; \ 91 if (rp[_i] == bcol) { \ 92 bap = ap + bs2*_i + bs*cidx + ridx; \ 93 if (addv == ADD_VALUES) *bap += value; \ 94 else *bap = value; \ 95 goto b_noinsert; \ 96 } \ 97 } \ 98 if (b->nonew == 1) goto b_noinsert; \ 99 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 100 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for (ii=N; ii>=_i; ii--) { \ 104 rp[ii+1] = rp[ii]; \ 105 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 106 } \ 107 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 B->nonzerostate++;\ 111 b_noinsert:; \ 112 bilen[brow] = nrow; \ 113 } 114 115 /* Only add/insert a(i,j) with i<=j (blocks). 116 Any a(i,j) with i>j input by user is ingored. 117 */ 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatSetValues_MPISBAIJ" 120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 121 { 122 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 123 MatScalar value; 124 PetscBool roworiented = baij->roworiented; 125 PetscErrorCode ierr; 126 PetscInt i,j,row,col; 127 PetscInt rstart_orig=mat->rmap->rstart; 128 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 129 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 130 131 /* Some Variables required in the macro */ 132 Mat A = baij->A; 133 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 134 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 135 MatScalar *aa =a->a; 136 137 Mat B = baij->B; 138 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 139 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 140 MatScalar *ba =b->a; 141 142 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 143 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 144 MatScalar *ap,*bap; 145 146 /* for stash */ 147 PetscInt n_loc, *in_loc = NULL; 148 MatScalar *v_loc = NULL; 149 150 PetscFunctionBegin; 151 if (!baij->donotstash) { 152 if (n > baij->n_loc) { 153 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 154 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 155 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 156 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 157 158 baij->n_loc = n; 159 } 160 in_loc = baij->in_loc; 161 v_loc = baij->v_loc; 162 } 163 164 for (i=0; i<m; i++) { 165 if (im[i] < 0) continue; 166 #if defined(PETSC_USE_DEBUG) 167 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 168 #endif 169 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 170 row = im[i] - rstart_orig; /* local row index */ 171 for (j=0; j<n; j++) { 172 if (im[i]/bs > in[j]/bs) { 173 if (a->ignore_ltriangular) { 174 continue; /* ignore lower triangular blocks */ 175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 176 } 177 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 178 col = in[j] - cstart_orig; /* local col index */ 179 brow = row/bs; bcol = col/bs; 180 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 181 if (roworiented) value = v[i*n+j]; 182 else value = v[i+j*m]; 183 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 184 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 185 } else if (in[j] < 0) continue; 186 #if defined(PETSC_USE_DEBUG) 187 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 188 #endif 189 else { /* off-diag entry (B) */ 190 if (mat->was_assembled) { 191 if (!baij->colmap) { 192 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 193 } 194 #if defined(PETSC_USE_CTABLE) 195 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 196 col = col - 1; 197 #else 198 col = baij->colmap[in[j]/bs] - 1; 199 #endif 200 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 201 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 202 col = in[j]; 203 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 204 B = baij->B; 205 b = (Mat_SeqBAIJ*)(B)->data; 206 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 207 ba = b->a; 208 } else col += in[j]%bs; 209 } else col = in[j]; 210 if (roworiented) value = v[i*n+j]; 211 else value = v[i+j*m]; 212 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 213 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 } 216 } else { /* off processor entry */ 217 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 218 if (!baij->donotstash) { 219 mat->assembled = PETSC_FALSE; 220 n_loc = 0; 221 for (j=0; j<n; j++) { 222 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 223 in_loc[n_loc] = in[j]; 224 if (roworiented) { 225 v_loc[n_loc] = v[i*n+j]; 226 } else { 227 v_loc[n_loc] = v[j*m+i]; 228 } 229 n_loc++; 230 } 231 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined" 240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 241 { 242 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 243 PetscErrorCode ierr; 244 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 245 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 246 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 247 PetscBool roworiented=a->roworiented; 248 const PetscScalar *value = v; 249 MatScalar *ap,*aa = a->a,*bap; 250 251 PetscFunctionBegin; 252 if (col < row) { 253 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 254 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 255 } 256 rp = aj + ai[row]; 257 ap = aa + bs2*ai[row]; 258 rmax = imax[row]; 259 nrow = ailen[row]; 260 value = v; 261 low = 0; 262 high = nrow; 263 264 while (high-low > 7) { 265 t = (low+high)/2; 266 if (rp[t] > col) high = t; 267 else low = t; 268 } 269 for (i=low; i<high; i++) { 270 if (rp[i] > col) break; 271 if (rp[i] == col) { 272 bap = ap + bs2*i; 273 if (roworiented) { 274 if (is == ADD_VALUES) { 275 for (ii=0; ii<bs; ii++) { 276 for (jj=ii; jj<bs2; jj+=bs) { 277 bap[jj] += *value++; 278 } 279 } 280 } else { 281 for (ii=0; ii<bs; ii++) { 282 for (jj=ii; jj<bs2; jj+=bs) { 283 bap[jj] = *value++; 284 } 285 } 286 } 287 } else { 288 if (is == ADD_VALUES) { 289 for (ii=0; ii<bs; ii++) { 290 for (jj=0; jj<bs; jj++) { 291 *bap++ += *value++; 292 } 293 } 294 } else { 295 for (ii=0; ii<bs; ii++) { 296 for (jj=0; jj<bs; jj++) { 297 *bap++ = *value++; 298 } 299 } 300 } 301 } 302 goto noinsert2; 303 } 304 } 305 if (nonew == 1) goto noinsert2; 306 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 307 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 308 N = nrow++ - 1; high++; 309 /* shift up all the later entries in this row */ 310 for (ii=N; ii>=i; ii--) { 311 rp[ii+1] = rp[ii]; 312 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 313 } 314 if (N >= i) { 315 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 316 } 317 rp[i] = col; 318 bap = ap + bs2*i; 319 if (roworiented) { 320 for (ii=0; ii<bs; ii++) { 321 for (jj=ii; jj<bs2; jj+=bs) { 322 bap[jj] = *value++; 323 } 324 } 325 } else { 326 for (ii=0; ii<bs; ii++) { 327 for (jj=0; jj<bs; jj++) { 328 *bap++ = *value++; 329 } 330 } 331 } 332 noinsert2:; 333 ailen[row] = nrow; 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined" 339 /* 340 This routine is exactly duplicated in mpibaij.c 341 */ 342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 343 { 344 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 345 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 346 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 347 PetscErrorCode ierr; 348 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 349 PetscBool roworiented=a->roworiented; 350 const PetscScalar *value = v; 351 MatScalar *ap,*aa = a->a,*bap; 352 353 PetscFunctionBegin; 354 rp = aj + ai[row]; 355 ap = aa + bs2*ai[row]; 356 rmax = imax[row]; 357 nrow = ailen[row]; 358 low = 0; 359 high = nrow; 360 value = v; 361 while (high-low > 7) { 362 t = (low+high)/2; 363 if (rp[t] > col) high = t; 364 else low = t; 365 } 366 for (i=low; i<high; i++) { 367 if (rp[i] > col) break; 368 if (rp[i] == col) { 369 bap = ap + bs2*i; 370 if (roworiented) { 371 if (is == ADD_VALUES) { 372 for (ii=0; ii<bs; ii++) { 373 for (jj=ii; jj<bs2; jj+=bs) { 374 bap[jj] += *value++; 375 } 376 } 377 } else { 378 for (ii=0; ii<bs; ii++) { 379 for (jj=ii; jj<bs2; jj+=bs) { 380 bap[jj] = *value++; 381 } 382 } 383 } 384 } else { 385 if (is == ADD_VALUES) { 386 for (ii=0; ii<bs; ii++,value+=bs) { 387 for (jj=0; jj<bs; jj++) { 388 bap[jj] += value[jj]; 389 } 390 bap += bs; 391 } 392 } else { 393 for (ii=0; ii<bs; ii++,value+=bs) { 394 for (jj=0; jj<bs; jj++) { 395 bap[jj] = value[jj]; 396 } 397 bap += bs; 398 } 399 } 400 } 401 goto noinsert2; 402 } 403 } 404 if (nonew == 1) goto noinsert2; 405 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 406 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 407 N = nrow++ - 1; high++; 408 /* shift up all the later entries in this row */ 409 for (ii=N; ii>=i; ii--) { 410 rp[ii+1] = rp[ii]; 411 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 412 } 413 if (N >= i) { 414 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 415 } 416 rp[i] = col; 417 bap = ap + bs2*i; 418 if (roworiented) { 419 for (ii=0; ii<bs; ii++) { 420 for (jj=ii; jj<bs2; jj+=bs) { 421 bap[jj] = *value++; 422 } 423 } 424 } else { 425 for (ii=0; ii<bs; ii++) { 426 for (jj=0; jj<bs; jj++) { 427 *bap++ = *value++; 428 } 429 } 430 } 431 noinsert2:; 432 ailen[row] = nrow; 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNCT__ 437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 438 /* 439 This routine could be optimized by removing the need for the block copy below and passing stride information 440 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 441 */ 442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 443 { 444 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 445 const MatScalar *value; 446 MatScalar *barray =baij->barray; 447 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 448 PetscErrorCode ierr; 449 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 450 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 451 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 452 453 PetscFunctionBegin; 454 if (!barray) { 455 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 456 baij->barray = barray; 457 } 458 459 if (roworiented) { 460 stepval = (n-1)*bs; 461 } else { 462 stepval = (m-1)*bs; 463 } 464 for (i=0; i<m; i++) { 465 if (im[i] < 0) continue; 466 #if defined(PETSC_USE_DEBUG) 467 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 468 #endif 469 if (im[i] >= rstart && im[i] < rend) { 470 row = im[i] - rstart; 471 for (j=0; j<n; j++) { 472 if (im[i] > in[j]) { 473 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 474 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 475 } 476 /* If NumCol = 1 then a copy is not required */ 477 if ((roworiented) && (n == 1)) { 478 barray = (MatScalar*) v + i*bs2; 479 } else if ((!roworiented) && (m == 1)) { 480 barray = (MatScalar*) v + j*bs2; 481 } else { /* Here a copy is required */ 482 if (roworiented) { 483 value = v + i*(stepval+bs)*bs + j*bs; 484 } else { 485 value = v + j*(stepval+bs)*bs + i*bs; 486 } 487 for (ii=0; ii<bs; ii++,value+=stepval) { 488 for (jj=0; jj<bs; jj++) { 489 *barray++ = *value++; 490 } 491 } 492 barray -=bs2; 493 } 494 495 if (in[j] >= cstart && in[j] < cend) { 496 col = in[j] - cstart; 497 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 498 } else if (in[j] < 0) continue; 499 #if defined(PETSC_USE_DEBUG) 500 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 501 #endif 502 else { 503 if (mat->was_assembled) { 504 if (!baij->colmap) { 505 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 506 } 507 508 #if defined(PETSC_USE_DEBUG) 509 #if defined(PETSC_USE_CTABLE) 510 { PetscInt data; 511 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 512 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 513 } 514 #else 515 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 516 #endif 517 #endif 518 #if defined(PETSC_USE_CTABLE) 519 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col = (col - 1)/bs; 521 #else 522 col = (baij->colmap[in[j]] - 1)/bs; 523 #endif 524 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 } 528 } else col = in[j]; 529 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 530 } 531 } 532 } else { 533 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 534 if (!baij->donotstash) { 535 if (roworiented) { 536 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 537 } else { 538 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 539 } 540 } 541 } 542 } 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_MPISBAIJ" 548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 549 { 550 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 551 PetscErrorCode ierr; 552 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 553 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 554 555 PetscFunctionBegin; 556 for (i=0; i<m; i++) { 557 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 558 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 559 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 560 row = idxm[i] - bsrstart; 561 for (j=0; j<n; j++) { 562 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 563 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 564 if (idxn[j] >= bscstart && idxn[j] < bscend) { 565 col = idxn[j] - bscstart; 566 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 567 } else { 568 if (!baij->colmap) { 569 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 570 } 571 #if defined(PETSC_USE_CTABLE) 572 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 573 data--; 574 #else 575 data = baij->colmap[idxn[j]/bs]-1; 576 #endif 577 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 578 else { 579 col = data + idxn[j]%bs; 580 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } 582 } 583 } 584 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatNorm_MPISBAIJ" 591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 592 { 593 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscReal sum[2],*lnorm2; 596 597 PetscFunctionBegin; 598 if (baij->size == 1) { 599 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 600 } else { 601 if (type == NORM_FROBENIUS) { 602 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 603 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 605 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 606 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 607 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 608 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 609 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 610 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 611 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 612 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 613 PetscReal *rsum,*rsum2,vabs; 614 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 615 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 616 MatScalar *v; 617 618 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 619 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 620 /* Amat */ 621 v = amat->a; jj = amat->j; 622 for (brow=0; brow<mbs; brow++) { 623 grow = bs*(rstart + brow); 624 nz = amat->i[brow+1] - amat->i[brow]; 625 for (bcol=0; bcol<nz; bcol++) { 626 gcol = bs*(rstart + *jj); jj++; 627 for (col=0; col<bs; col++) { 628 for (row=0; row<bs; row++) { 629 vabs = PetscAbsScalar(*v); v++; 630 rsum[gcol+col] += vabs; 631 /* non-diagonal block */ 632 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 633 } 634 } 635 } 636 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 637 } 638 /* Bmat */ 639 v = bmat->a; jj = bmat->j; 640 for (brow=0; brow<mbs; brow++) { 641 grow = bs*(rstart + brow); 642 nz = bmat->i[brow+1] - bmat->i[brow]; 643 for (bcol=0; bcol<nz; bcol++) { 644 gcol = bs*garray[*jj]; jj++; 645 for (col=0; col<bs; col++) { 646 for (row=0; row<bs; row++) { 647 vabs = PetscAbsScalar(*v); v++; 648 rsum[gcol+col] += vabs; 649 rsum[grow+row] += vabs; 650 } 651 } 652 } 653 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 654 } 655 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 656 *norm = 0.0; 657 for (col=0; col<mat->cmap->N; col++) { 658 if (rsum2[col] > *norm) *norm = rsum2[col]; 659 } 660 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 661 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 662 } 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 668 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 671 PetscErrorCode ierr; 672 PetscInt nstash,reallocs; 673 674 PetscFunctionBegin; 675 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 676 677 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 678 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 679 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 680 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 681 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 682 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 683 PetscFunctionReturn(0); 684 } 685 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 688 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 689 { 690 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 691 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 692 PetscErrorCode ierr; 693 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 694 PetscInt *row,*col; 695 PetscBool other_disassembled; 696 PetscMPIInt n; 697 PetscBool r1,r2,r3; 698 MatScalar *val; 699 700 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 701 PetscFunctionBegin; 702 if (!baij->donotstash && !mat->nooffprocentries) { 703 while (1) { 704 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 705 if (!flg) break; 706 707 for (i=0; i<n;) { 708 /* Now identify the consecutive vals belonging to the same row */ 709 for (j=i,rstart=row[j]; j<n; j++) { 710 if (row[j] != rstart) break; 711 } 712 if (j < n) ncols = j-i; 713 else ncols = n-i; 714 /* Now assemble all these values with a single function call */ 715 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 716 i = j; 717 } 718 } 719 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 720 /* Now process the block-stash. Since the values are stashed column-oriented, 721 set the roworiented flag to column oriented, and after MatSetValues() 722 restore the original flags */ 723 r1 = baij->roworiented; 724 r2 = a->roworiented; 725 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 726 727 baij->roworiented = PETSC_FALSE; 728 a->roworiented = PETSC_FALSE; 729 730 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 731 while (1) { 732 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 733 if (!flg) break; 734 735 for (i=0; i<n;) { 736 /* Now identify the consecutive vals belonging to the same row */ 737 for (j=i,rstart=row[j]; j<n; j++) { 738 if (row[j] != rstart) break; 739 } 740 if (j < n) ncols = j-i; 741 else ncols = n-i; 742 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 743 i = j; 744 } 745 } 746 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 747 748 baij->roworiented = r1; 749 a->roworiented = r2; 750 751 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 752 } 753 754 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 755 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 756 757 /* determine if any processor has disassembled, if so we must 758 also disassemble ourselfs, in order that we may reassemble. */ 759 /* 760 if nonzero structure of submatrix B cannot change then we know that 761 no processor disassembled thus we can skip this stuff 762 */ 763 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 764 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 765 if (mat->was_assembled && !other_disassembled) { 766 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 767 } 768 } 769 770 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 771 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 772 } 773 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 774 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 775 776 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 777 778 baij->rowvalues = 0; 779 780 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 781 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 782 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 783 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 784 } 785 PetscFunctionReturn(0); 786 } 787 788 extern PetscErrorCode MatView_SeqSBAIJ(Mat,PetscViewer); 789 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 790 #include <petscdraw.h> 791 #undef __FUNCT__ 792 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 793 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 794 { 795 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 796 PetscErrorCode ierr; 797 PetscInt bs = mat->rmap->bs; 798 PetscMPIInt rank = baij->rank; 799 PetscBool iascii,isdraw; 800 PetscViewer sviewer; 801 PetscViewerFormat format; 802 803 PetscFunctionBegin; 804 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 805 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 806 if (iascii) { 807 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 808 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 809 MatInfo info; 810 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 811 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 812 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 813 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 814 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 815 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 816 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 817 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 818 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 819 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 820 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 821 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 822 PetscFunctionReturn(0); 823 } else if (format == PETSC_VIEWER_ASCII_INFO) { 824 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 825 PetscFunctionReturn(0); 826 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 827 PetscFunctionReturn(0); 828 } 829 } 830 831 if (isdraw) { 832 PetscDraw draw; 833 PetscBool isnull; 834 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 835 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 836 if (isnull) PetscFunctionReturn(0); 837 } 838 839 { 840 /* assemble the entire matrix onto first processor. */ 841 Mat A; 842 Mat_SeqSBAIJ *Aloc; 843 Mat_SeqBAIJ *Bloc; 844 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 845 MatScalar *a; 846 const char *matname; 847 848 /* Should this be the same type as mat? */ 849 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 850 if (!rank) { 851 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 852 } else { 853 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 854 } 855 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 856 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 857 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 858 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 859 860 /* copy over the A part */ 861 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 862 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 863 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 864 865 for (i=0; i<mbs; i++) { 866 rvals[0] = bs*(baij->rstartbs + i); 867 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 868 for (j=ai[i]; j<ai[i+1]; j++) { 869 col = (baij->cstartbs+aj[j])*bs; 870 for (k=0; k<bs; k++) { 871 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 872 col++; 873 a += bs; 874 } 875 } 876 } 877 /* copy over the B part */ 878 Bloc = (Mat_SeqBAIJ*)baij->B->data; 879 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 880 for (i=0; i<mbs; i++) { 881 882 rvals[0] = bs*(baij->rstartbs + i); 883 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 884 for (j=ai[i]; j<ai[i+1]; j++) { 885 col = baij->garray[aj[j]]*bs; 886 for (k=0; k<bs; k++) { 887 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 888 col++; 889 a += bs; 890 } 891 } 892 } 893 ierr = PetscFree(rvals);CHKERRQ(ierr); 894 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 895 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 896 /* 897 Everyone has to call to draw the matrix since the graphics waits are 898 synchronized across all processors that share the PetscDraw object 899 */ 900 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 901 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 902 if (!rank) { 903 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 904 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 905 } 906 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 907 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 908 ierr = MatDestroy(&A);CHKERRQ(ierr); 909 } 910 PetscFunctionReturn(0); 911 } 912 913 #undef __FUNCT__ 914 #define __FUNCT__ "MatView_MPISBAIJ" 915 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 916 { 917 PetscErrorCode ierr; 918 PetscBool iascii,isdraw,issocket,isbinary; 919 920 PetscFunctionBegin; 921 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 922 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 923 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 924 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 925 if (iascii || isdraw || issocket || isbinary) { 926 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 927 } 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatDestroy_MPISBAIJ" 933 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 934 { 935 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 936 PetscErrorCode ierr; 937 938 PetscFunctionBegin; 939 #if defined(PETSC_USE_LOG) 940 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 941 #endif 942 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 943 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 944 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 945 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 946 #if defined(PETSC_USE_CTABLE) 947 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 948 #else 949 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 950 #endif 951 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 952 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 953 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 954 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 955 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 956 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 957 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 958 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 959 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 960 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 961 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 962 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 963 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 964 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 965 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 966 #if defined(PETSC_USE_REAL_MAT_SINGLE) 967 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 968 #endif 969 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 970 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 971 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 972 ierr = PetscFree(mat->data);CHKERRQ(ierr); 973 974 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 975 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 976 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 977 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 978 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 979 #if defined(PETSC_HAVE_ELEMENTAL) 980 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 981 #endif 982 PetscFunctionReturn(0); 983 } 984 985 #undef __FUNCT__ 986 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 987 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 988 { 989 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 990 PetscErrorCode ierr; 991 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 992 PetscScalar *from; 993 const PetscScalar *x; 994 995 PetscFunctionBegin; 996 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 997 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 998 999 /* diagonal part */ 1000 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1001 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1002 1003 /* subdiagonal part */ 1004 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1005 1006 /* copy x into the vec slvec0 */ 1007 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1008 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1009 1010 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1011 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1012 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1013 1014 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1015 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1016 /* supperdiagonal part */ 1017 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1018 PetscFunctionReturn(0); 1019 } 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatMult_MPISBAIJ" 1023 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1024 { 1025 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1026 PetscErrorCode ierr; 1027 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1028 PetscScalar *from; 1029 const PetscScalar *x; 1030 1031 PetscFunctionBegin; 1032 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1033 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1034 1035 /* diagonal part */ 1036 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1037 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1038 1039 /* subdiagonal part */ 1040 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1041 1042 /* copy x into the vec slvec0 */ 1043 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1044 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1045 1046 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1047 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1048 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1049 1050 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1051 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052 /* supperdiagonal part */ 1053 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1054 PetscFunctionReturn(0); 1055 } 1056 1057 #undef __FUNCT__ 1058 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1059 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1060 { 1061 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1062 PetscErrorCode ierr; 1063 PetscInt nt; 1064 1065 PetscFunctionBegin; 1066 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1067 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1068 1069 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1070 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1071 1072 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1073 /* do diagonal part */ 1074 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1075 /* do supperdiagonal part */ 1076 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1077 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1078 /* do subdiagonal part */ 1079 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1080 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1081 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #undef __FUNCT__ 1086 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1087 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1088 { 1089 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1090 PetscErrorCode ierr; 1091 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1092 PetscScalar *from,zero=0.0; 1093 const PetscScalar *x; 1094 1095 PetscFunctionBegin; 1096 /* 1097 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1098 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1099 */ 1100 /* diagonal part */ 1101 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1102 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1103 1104 /* subdiagonal part */ 1105 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1106 1107 /* copy x into the vec slvec0 */ 1108 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1109 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1110 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1111 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1112 1113 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1115 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1116 1117 /* supperdiagonal part */ 1118 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1124 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1125 { 1126 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1131 /* do diagonal part */ 1132 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1133 /* do supperdiagonal part */ 1134 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1135 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1136 1137 /* do subdiagonal part */ 1138 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1139 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 /* 1145 This only works correctly for square matrices where the subblock A->A is the 1146 diagonal block 1147 */ 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1150 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1151 { 1152 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1153 PetscErrorCode ierr; 1154 1155 PetscFunctionBegin; 1156 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1157 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 #undef __FUNCT__ 1162 #define __FUNCT__ "MatScale_MPISBAIJ" 1163 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1164 { 1165 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1166 PetscErrorCode ierr; 1167 1168 PetscFunctionBegin; 1169 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1170 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1176 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1177 { 1178 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1179 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1180 PetscErrorCode ierr; 1181 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1182 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1183 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1184 1185 PetscFunctionBegin; 1186 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1187 mat->getrowactive = PETSC_TRUE; 1188 1189 if (!mat->rowvalues && (idx || v)) { 1190 /* 1191 allocate enough space to hold information from the longest row. 1192 */ 1193 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1194 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1195 PetscInt max = 1,mbs = mat->mbs,tmp; 1196 for (i=0; i<mbs; i++) { 1197 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1198 if (max < tmp) max = tmp; 1199 } 1200 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1201 } 1202 1203 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1204 lrow = row - brstart; /* local row index */ 1205 1206 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1207 if (!v) {pvA = 0; pvB = 0;} 1208 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1209 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1210 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1211 nztot = nzA + nzB; 1212 1213 cmap = mat->garray; 1214 if (v || idx) { 1215 if (nztot) { 1216 /* Sort by increasing column numbers, assuming A and B already sorted */ 1217 PetscInt imark = -1; 1218 if (v) { 1219 *v = v_p = mat->rowvalues; 1220 for (i=0; i<nzB; i++) { 1221 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1222 else break; 1223 } 1224 imark = i; 1225 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1226 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1227 } 1228 if (idx) { 1229 *idx = idx_p = mat->rowindices; 1230 if (imark > -1) { 1231 for (i=0; i<imark; i++) { 1232 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1233 } 1234 } else { 1235 for (i=0; i<nzB; i++) { 1236 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1237 else break; 1238 } 1239 imark = i; 1240 } 1241 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1242 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1243 } 1244 } else { 1245 if (idx) *idx = 0; 1246 if (v) *v = 0; 1247 } 1248 } 1249 *nz = nztot; 1250 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1251 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1257 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1258 { 1259 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1260 1261 PetscFunctionBegin; 1262 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1263 baij->getrowactive = PETSC_FALSE; 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1269 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1270 { 1271 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1272 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1273 1274 PetscFunctionBegin; 1275 aA->getrow_utriangular = PETSC_TRUE; 1276 PetscFunctionReturn(0); 1277 } 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1280 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1281 { 1282 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1283 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1284 1285 PetscFunctionBegin; 1286 aA->getrow_utriangular = PETSC_FALSE; 1287 PetscFunctionReturn(0); 1288 } 1289 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1292 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1293 { 1294 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1295 PetscErrorCode ierr; 1296 1297 PetscFunctionBegin; 1298 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1299 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1305 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1306 { 1307 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1308 PetscErrorCode ierr; 1309 1310 PetscFunctionBegin; 1311 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1312 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ() 1317 Input: isrow - distributed(parallel), 1318 iscol_local - locally owned (seq) 1319 */ 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "ISEqual_private" 1322 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1323 { 1324 PetscErrorCode ierr; 1325 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1326 const PetscInt *ptr1,*ptr2; 1327 1328 PetscFunctionBegin; 1329 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1330 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1331 if (sz1 > sz2) { 1332 *flg = PETSC_FALSE; 1333 PetscFunctionReturn(0); 1334 } 1335 1336 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1337 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1338 1339 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1340 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1341 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1342 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1343 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1344 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1345 1346 nmatch=0; 1347 k = 0; 1348 for (i=0; i<sz1; i++){ 1349 for (j=k; j<sz2; j++){ 1350 if (a1[i] == a2[j]) { 1351 k = j; nmatch++; 1352 break; 1353 } 1354 } 1355 } 1356 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1357 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1358 ierr = PetscFree(a1);CHKERRQ(ierr); 1359 ierr = PetscFree(a2);CHKERRQ(ierr); 1360 if (nmatch < sz1) { 1361 *flg = PETSC_FALSE; 1362 } else { 1363 *flg = PETSC_TRUE; 1364 } 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ" 1370 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1371 { 1372 PetscErrorCode ierr; 1373 IS iscol_local; 1374 PetscInt csize; 1375 PetscBool isequal; 1376 1377 PetscFunctionBegin; 1378 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1379 if (call == MAT_REUSE_MATRIX) { 1380 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1381 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1382 } else { 1383 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1384 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1385 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1386 } 1387 1388 /* now call MatGetSubMatrix_MPIBAIJ() */ 1389 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1390 if (call == MAT_INITIAL_MATRIX) { 1391 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1392 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1393 } 1394 PetscFunctionReturn(0); 1395 } 1396 1397 #undef __FUNCT__ 1398 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1399 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1400 { 1401 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1402 PetscErrorCode ierr; 1403 1404 PetscFunctionBegin; 1405 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1406 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1412 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1413 { 1414 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1415 Mat A = a->A,B = a->B; 1416 PetscErrorCode ierr; 1417 PetscReal isend[5],irecv[5]; 1418 1419 PetscFunctionBegin; 1420 info->block_size = (PetscReal)matin->rmap->bs; 1421 1422 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1423 1424 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1425 isend[3] = info->memory; isend[4] = info->mallocs; 1426 1427 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1428 1429 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1430 isend[3] += info->memory; isend[4] += info->mallocs; 1431 if (flag == MAT_LOCAL) { 1432 info->nz_used = isend[0]; 1433 info->nz_allocated = isend[1]; 1434 info->nz_unneeded = isend[2]; 1435 info->memory = isend[3]; 1436 info->mallocs = isend[4]; 1437 } else if (flag == MAT_GLOBAL_MAX) { 1438 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1439 1440 info->nz_used = irecv[0]; 1441 info->nz_allocated = irecv[1]; 1442 info->nz_unneeded = irecv[2]; 1443 info->memory = irecv[3]; 1444 info->mallocs = irecv[4]; 1445 } else if (flag == MAT_GLOBAL_SUM) { 1446 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1447 1448 info->nz_used = irecv[0]; 1449 info->nz_allocated = irecv[1]; 1450 info->nz_unneeded = irecv[2]; 1451 info->memory = irecv[3]; 1452 info->mallocs = irecv[4]; 1453 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1454 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1455 info->fill_ratio_needed = 0; 1456 info->factor_mallocs = 0; 1457 PetscFunctionReturn(0); 1458 } 1459 1460 #undef __FUNCT__ 1461 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1462 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1463 { 1464 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1465 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1466 PetscErrorCode ierr; 1467 1468 PetscFunctionBegin; 1469 switch (op) { 1470 case MAT_NEW_NONZERO_LOCATIONS: 1471 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1472 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1473 case MAT_KEEP_NONZERO_PATTERN: 1474 case MAT_NEW_NONZERO_LOCATION_ERR: 1475 MatCheckPreallocated(A,1); 1476 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1477 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1478 break; 1479 case MAT_ROW_ORIENTED: 1480 MatCheckPreallocated(A,1); 1481 a->roworiented = flg; 1482 1483 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1484 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1485 break; 1486 case MAT_NEW_DIAGONALS: 1487 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1488 break; 1489 case MAT_IGNORE_OFF_PROC_ENTRIES: 1490 a->donotstash = flg; 1491 break; 1492 case MAT_USE_HASH_TABLE: 1493 a->ht_flag = flg; 1494 break; 1495 case MAT_HERMITIAN: 1496 MatCheckPreallocated(A,1); 1497 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1498 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1499 1500 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1501 break; 1502 case MAT_SPD: 1503 A->spd_set = PETSC_TRUE; 1504 A->spd = flg; 1505 if (flg) { 1506 A->symmetric = PETSC_TRUE; 1507 A->structurally_symmetric = PETSC_TRUE; 1508 A->symmetric_set = PETSC_TRUE; 1509 A->structurally_symmetric_set = PETSC_TRUE; 1510 } 1511 break; 1512 case MAT_SYMMETRIC: 1513 MatCheckPreallocated(A,1); 1514 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1515 break; 1516 case MAT_STRUCTURALLY_SYMMETRIC: 1517 MatCheckPreallocated(A,1); 1518 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1519 break; 1520 case MAT_SYMMETRY_ETERNAL: 1521 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1522 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1523 break; 1524 case MAT_IGNORE_LOWER_TRIANGULAR: 1525 aA->ignore_ltriangular = flg; 1526 break; 1527 case MAT_ERROR_LOWER_TRIANGULAR: 1528 aA->ignore_ltriangular = flg; 1529 break; 1530 case MAT_GETROW_UPPERTRIANGULAR: 1531 aA->getrow_utriangular = flg; 1532 break; 1533 default: 1534 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1535 } 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1541 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1542 { 1543 PetscErrorCode ierr; 1544 1545 PetscFunctionBegin; 1546 if (MAT_INITIAL_MATRIX || *B != A) { 1547 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1548 } 1549 PetscFunctionReturn(0); 1550 } 1551 1552 #undef __FUNCT__ 1553 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1554 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1555 { 1556 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1557 Mat a = baij->A, b=baij->B; 1558 PetscErrorCode ierr; 1559 PetscInt nv,m,n; 1560 PetscBool flg; 1561 1562 PetscFunctionBegin; 1563 if (ll != rr) { 1564 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1565 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1566 } 1567 if (!ll) PetscFunctionReturn(0); 1568 1569 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1570 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1571 1572 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1573 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1574 1575 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1576 1577 /* left diagonalscale the off-diagonal part */ 1578 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1579 1580 /* scale the diagonal part */ 1581 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1582 1583 /* right diagonalscale the off-diagonal part */ 1584 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1591 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1592 { 1593 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1594 PetscErrorCode ierr; 1595 1596 PetscFunctionBegin; 1597 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1598 PetscFunctionReturn(0); 1599 } 1600 1601 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1602 1603 #undef __FUNCT__ 1604 #define __FUNCT__ "MatEqual_MPISBAIJ" 1605 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1606 { 1607 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1608 Mat a,b,c,d; 1609 PetscBool flg; 1610 PetscErrorCode ierr; 1611 1612 PetscFunctionBegin; 1613 a = matA->A; b = matA->B; 1614 c = matB->A; d = matB->B; 1615 1616 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1617 if (flg) { 1618 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1619 } 1620 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1621 PetscFunctionReturn(0); 1622 } 1623 1624 #undef __FUNCT__ 1625 #define __FUNCT__ "MatCopy_MPISBAIJ" 1626 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1627 { 1628 PetscErrorCode ierr; 1629 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1630 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1631 1632 PetscFunctionBegin; 1633 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1634 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1635 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1636 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1637 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1638 } else { 1639 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1640 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1641 } 1642 PetscFunctionReturn(0); 1643 } 1644 1645 #undef __FUNCT__ 1646 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1647 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1648 { 1649 PetscErrorCode ierr; 1650 1651 PetscFunctionBegin; 1652 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1653 PetscFunctionReturn(0); 1654 } 1655 1656 #undef __FUNCT__ 1657 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1658 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1659 { 1660 PetscErrorCode ierr; 1661 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1662 PetscBLASInt bnz,one=1; 1663 Mat_SeqSBAIJ *xa,*ya; 1664 Mat_SeqBAIJ *xb,*yb; 1665 1666 PetscFunctionBegin; 1667 if (str == SAME_NONZERO_PATTERN) { 1668 PetscScalar alpha = a; 1669 xa = (Mat_SeqSBAIJ*)xx->A->data; 1670 ya = (Mat_SeqSBAIJ*)yy->A->data; 1671 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1672 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1673 xb = (Mat_SeqBAIJ*)xx->B->data; 1674 yb = (Mat_SeqBAIJ*)yy->B->data; 1675 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1676 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1677 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1678 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1679 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1680 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1681 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1682 } else { 1683 Mat B; 1684 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1685 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1686 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1687 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1688 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1689 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1690 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1691 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1692 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1693 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1694 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1695 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1696 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1697 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1698 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1699 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1700 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1701 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1702 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1703 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1704 } 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1710 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1711 { 1712 PetscErrorCode ierr; 1713 PetscInt i; 1714 PetscBool flg; 1715 1716 PetscFunctionBegin; 1717 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1718 for (i=0; i<n; i++) { 1719 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1720 if (!flg) { 1721 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1722 } 1723 } 1724 PetscFunctionReturn(0); 1725 } 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatShift_MPISBAIJ" 1729 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1730 { 1731 PetscErrorCode ierr; 1732 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1733 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1734 1735 PetscFunctionBegin; 1736 if (!Y->preallocated) { 1737 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1738 } else if (!aij->nz) { 1739 PetscInt nonew = aij->nonew; 1740 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1741 aij->nonew = nonew; 1742 } 1743 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1744 PetscFunctionReturn(0); 1745 } 1746 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatMissingDiagonal_MPISBAIJ" 1749 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1750 { 1751 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1752 PetscErrorCode ierr; 1753 1754 PetscFunctionBegin; 1755 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1756 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1757 if (d) { 1758 PetscInt rstart; 1759 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1760 *d += rstart/A->rmap->bs; 1761 1762 } 1763 PetscFunctionReturn(0); 1764 } 1765 1766 #undef __FUNCT__ 1767 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1768 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1769 { 1770 PetscFunctionBegin; 1771 *a = ((Mat_MPISBAIJ*)A->data)->A; 1772 PetscFunctionReturn(0); 1773 } 1774 1775 /* -------------------------------------------------------------------*/ 1776 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1777 MatGetRow_MPISBAIJ, 1778 MatRestoreRow_MPISBAIJ, 1779 MatMult_MPISBAIJ, 1780 /* 4*/ MatMultAdd_MPISBAIJ, 1781 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1782 MatMultAdd_MPISBAIJ, 1783 0, 1784 0, 1785 0, 1786 /* 10*/ 0, 1787 0, 1788 0, 1789 MatSOR_MPISBAIJ, 1790 MatTranspose_MPISBAIJ, 1791 /* 15*/ MatGetInfo_MPISBAIJ, 1792 MatEqual_MPISBAIJ, 1793 MatGetDiagonal_MPISBAIJ, 1794 MatDiagonalScale_MPISBAIJ, 1795 MatNorm_MPISBAIJ, 1796 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1797 MatAssemblyEnd_MPISBAIJ, 1798 MatSetOption_MPISBAIJ, 1799 MatZeroEntries_MPISBAIJ, 1800 /* 24*/ 0, 1801 0, 1802 0, 1803 0, 1804 0, 1805 /* 29*/ MatSetUp_MPISBAIJ, 1806 0, 1807 0, 1808 MatGetDiagonalBlock_MPISBAIJ, 1809 0, 1810 /* 34*/ MatDuplicate_MPISBAIJ, 1811 0, 1812 0, 1813 0, 1814 0, 1815 /* 39*/ MatAXPY_MPISBAIJ, 1816 MatGetSubMatrices_MPISBAIJ, 1817 MatIncreaseOverlap_MPISBAIJ, 1818 MatGetValues_MPISBAIJ, 1819 MatCopy_MPISBAIJ, 1820 /* 44*/ 0, 1821 MatScale_MPISBAIJ, 1822 MatShift_MPISBAIJ, 1823 0, 1824 0, 1825 /* 49*/ 0, 1826 0, 1827 0, 1828 0, 1829 0, 1830 /* 54*/ 0, 1831 0, 1832 MatSetUnfactored_MPISBAIJ, 1833 0, 1834 MatSetValuesBlocked_MPISBAIJ, 1835 /* 59*/ MatGetSubMatrix_MPISBAIJ, 1836 0, 1837 0, 1838 0, 1839 0, 1840 /* 64*/ 0, 1841 0, 1842 0, 1843 0, 1844 0, 1845 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1846 0, 1847 0, 1848 0, 1849 0, 1850 /* 74*/ 0, 1851 0, 1852 0, 1853 0, 1854 0, 1855 /* 79*/ 0, 1856 0, 1857 0, 1858 0, 1859 MatLoad_MPISBAIJ, 1860 /* 84*/ 0, 1861 0, 1862 0, 1863 0, 1864 0, 1865 /* 89*/ 0, 1866 0, 1867 0, 1868 0, 1869 0, 1870 /* 94*/ 0, 1871 0, 1872 0, 1873 0, 1874 0, 1875 /* 99*/ 0, 1876 0, 1877 0, 1878 0, 1879 0, 1880 /*104*/ 0, 1881 MatRealPart_MPISBAIJ, 1882 MatImaginaryPart_MPISBAIJ, 1883 MatGetRowUpperTriangular_MPISBAIJ, 1884 MatRestoreRowUpperTriangular_MPISBAIJ, 1885 /*109*/ 0, 1886 0, 1887 0, 1888 0, 1889 MatMissingDiagonal_MPISBAIJ, 1890 /*114*/ 0, 1891 0, 1892 0, 1893 0, 1894 0, 1895 /*119*/ 0, 1896 0, 1897 0, 1898 0, 1899 0, 1900 /*124*/ 0, 1901 0, 1902 0, 1903 0, 1904 0, 1905 /*129*/ 0, 1906 0, 1907 0, 1908 0, 1909 0, 1910 /*134*/ 0, 1911 0, 1912 0, 1913 0, 1914 0, 1915 /*139*/ 0, 1916 0, 1917 0, 1918 0, 1919 0, 1920 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1921 }; 1922 1923 #undef __FUNCT__ 1924 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1925 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1926 { 1927 Mat_MPISBAIJ *b; 1928 PetscErrorCode ierr; 1929 PetscInt i,mbs,Mbs; 1930 1931 PetscFunctionBegin; 1932 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1933 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1934 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1935 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1936 1937 b = (Mat_MPISBAIJ*)B->data; 1938 mbs = B->rmap->n/bs; 1939 Mbs = B->rmap->N/bs; 1940 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1941 1942 B->rmap->bs = bs; 1943 b->bs2 = bs*bs; 1944 b->mbs = mbs; 1945 b->Mbs = Mbs; 1946 b->nbs = B->cmap->n/bs; 1947 b->Nbs = B->cmap->N/bs; 1948 1949 for (i=0; i<=b->size; i++) { 1950 b->rangebs[i] = B->rmap->range[i]/bs; 1951 } 1952 b->rstartbs = B->rmap->rstart/bs; 1953 b->rendbs = B->rmap->rend/bs; 1954 1955 b->cstartbs = B->cmap->rstart/bs; 1956 b->cendbs = B->cmap->rend/bs; 1957 1958 if (!B->preallocated) { 1959 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1960 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1961 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1962 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1963 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1964 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1965 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1966 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1967 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1968 } 1969 1970 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1971 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1972 1973 B->preallocated = PETSC_TRUE; 1974 PetscFunctionReturn(0); 1975 } 1976 1977 #undef __FUNCT__ 1978 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1979 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1980 { 1981 PetscInt m,rstart,cstart,cend; 1982 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1983 const PetscInt *JJ =0; 1984 PetscScalar *values=0; 1985 PetscErrorCode ierr; 1986 1987 PetscFunctionBegin; 1988 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1989 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1990 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1991 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1992 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1993 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1994 m = B->rmap->n/bs; 1995 rstart = B->rmap->rstart/bs; 1996 cstart = B->cmap->rstart/bs; 1997 cend = B->cmap->rend/bs; 1998 1999 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2000 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2001 for (i=0; i<m; i++) { 2002 nz = ii[i+1] - ii[i]; 2003 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2004 nz_max = PetscMax(nz_max,nz); 2005 JJ = jj + ii[i]; 2006 for (j=0; j<nz; j++) { 2007 if (*JJ >= cstart) break; 2008 JJ++; 2009 } 2010 d = 0; 2011 for (; j<nz; j++) { 2012 if (*JJ++ >= cend) break; 2013 d++; 2014 } 2015 d_nnz[i] = d; 2016 o_nnz[i] = nz - d; 2017 } 2018 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2019 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2020 2021 values = (PetscScalar*)V; 2022 if (!values) { 2023 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2024 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2025 } 2026 for (i=0; i<m; i++) { 2027 PetscInt row = i + rstart; 2028 PetscInt ncols = ii[i+1] - ii[i]; 2029 const PetscInt *icols = jj + ii[i]; 2030 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2031 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2032 } 2033 2034 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2035 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2036 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2037 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2038 PetscFunctionReturn(0); 2039 } 2040 2041 /*MC 2042 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2043 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2044 the matrix is stored. 2045 2046 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2047 can call MatSetOption(Mat, MAT_HERMITIAN); 2048 2049 Options Database Keys: 2050 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2051 2052 Level: beginner 2053 2054 .seealso: MatCreateMPISBAIJ 2055 M*/ 2056 2057 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 2058 2059 #undef __FUNCT__ 2060 #define __FUNCT__ "MatCreate_MPISBAIJ" 2061 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2062 { 2063 Mat_MPISBAIJ *b; 2064 PetscErrorCode ierr; 2065 PetscBool flg = PETSC_FALSE; 2066 2067 PetscFunctionBegin; 2068 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2069 B->data = (void*)b; 2070 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2071 2072 B->ops->destroy = MatDestroy_MPISBAIJ; 2073 B->ops->view = MatView_MPISBAIJ; 2074 B->assembled = PETSC_FALSE; 2075 B->insertmode = NOT_SET_VALUES; 2076 2077 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2078 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2079 2080 /* build local table of row and column ownerships */ 2081 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2082 2083 /* build cache for off array entries formed */ 2084 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2085 2086 b->donotstash = PETSC_FALSE; 2087 b->colmap = NULL; 2088 b->garray = NULL; 2089 b->roworiented = PETSC_TRUE; 2090 2091 /* stuff used in block assembly */ 2092 b->barray = 0; 2093 2094 /* stuff used for matrix vector multiply */ 2095 b->lvec = 0; 2096 b->Mvctx = 0; 2097 b->slvec0 = 0; 2098 b->slvec0b = 0; 2099 b->slvec1 = 0; 2100 b->slvec1a = 0; 2101 b->slvec1b = 0; 2102 b->sMvctx = 0; 2103 2104 /* stuff for MatGetRow() */ 2105 b->rowindices = 0; 2106 b->rowvalues = 0; 2107 b->getrowactive = PETSC_FALSE; 2108 2109 /* hash table stuff */ 2110 b->ht = 0; 2111 b->hd = 0; 2112 b->ht_size = 0; 2113 b->ht_flag = PETSC_FALSE; 2114 b->ht_fact = 0; 2115 b->ht_total_ct = 0; 2116 b->ht_insert_ct = 0; 2117 2118 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 2119 b->ijonly = PETSC_FALSE; 2120 2121 b->in_loc = 0; 2122 b->v_loc = 0; 2123 b->n_loc = 0; 2124 2125 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2126 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2127 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2128 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2129 #if defined(PETSC_HAVE_ELEMENTAL) 2130 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2131 #endif 2132 2133 B->symmetric = PETSC_TRUE; 2134 B->structurally_symmetric = PETSC_TRUE; 2135 B->symmetric_set = PETSC_TRUE; 2136 B->structurally_symmetric_set = PETSC_TRUE; 2137 2138 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2139 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2140 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2141 if (flg) { 2142 PetscReal fact = 1.39; 2143 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2144 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2145 if (fact <= 1.0) fact = 1.39; 2146 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2147 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2148 } 2149 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2150 PetscFunctionReturn(0); 2151 } 2152 2153 /*MC 2154 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2155 2156 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2157 and MATMPISBAIJ otherwise. 2158 2159 Options Database Keys: 2160 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2161 2162 Level: beginner 2163 2164 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2165 M*/ 2166 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 2169 /*@C 2170 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2171 the user should preallocate the matrix storage by setting the parameters 2172 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2173 performance can be increased by more than a factor of 50. 2174 2175 Collective on Mat 2176 2177 Input Parameters: 2178 + B - the matrix 2179 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2180 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2181 . d_nz - number of block nonzeros per block row in diagonal portion of local 2182 submatrix (same for all local rows) 2183 . d_nnz - array containing the number of block nonzeros in the various block rows 2184 in the upper triangular and diagonal part of the in diagonal portion of the local 2185 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2186 for the diagonal entry and set a value even if it is zero. 2187 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2188 submatrix (same for all local rows). 2189 - o_nnz - array containing the number of nonzeros in the various block rows of the 2190 off-diagonal portion of the local submatrix that is right of the diagonal 2191 (possibly different for each block row) or NULL. 2192 2193 2194 Options Database Keys: 2195 . -mat_no_unroll - uses code that does not unroll the loops in the 2196 block calculations (much slower) 2197 . -mat_block_size - size of the blocks to use 2198 2199 Notes: 2200 2201 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2202 than it must be used on all processors that share the object for that argument. 2203 2204 If the *_nnz parameter is given then the *_nz parameter is ignored 2205 2206 Storage Information: 2207 For a square global matrix we define each processor's diagonal portion 2208 to be its local rows and the corresponding columns (a square submatrix); 2209 each processor's off-diagonal portion encompasses the remainder of the 2210 local matrix (a rectangular submatrix). 2211 2212 The user can specify preallocated storage for the diagonal part of 2213 the local submatrix with either d_nz or d_nnz (not both). Set 2214 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2215 memory allocation. Likewise, specify preallocated storage for the 2216 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2217 2218 You can call MatGetInfo() to get information on how effective the preallocation was; 2219 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2220 You can also run with the option -info and look for messages with the string 2221 malloc in them to see if additional memory allocation was needed. 2222 2223 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2224 the figure below we depict these three local rows and all columns (0-11). 2225 2226 .vb 2227 0 1 2 3 4 5 6 7 8 9 10 11 2228 -------------------------- 2229 row 3 |. . . d d d o o o o o o 2230 row 4 |. . . d d d o o o o o o 2231 row 5 |. . . d d d o o o o o o 2232 -------------------------- 2233 .ve 2234 2235 Thus, any entries in the d locations are stored in the d (diagonal) 2236 submatrix, and any entries in the o locations are stored in the 2237 o (off-diagonal) submatrix. Note that the d matrix is stored in 2238 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2239 2240 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2241 plus the diagonal part of the d matrix, 2242 and o_nz should indicate the number of block nonzeros per row in the o matrix 2243 2244 In general, for PDE problems in which most nonzeros are near the diagonal, 2245 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2246 or you will get TERRIBLE performance; see the users' manual chapter on 2247 matrices. 2248 2249 Level: intermediate 2250 2251 .keywords: matrix, block, aij, compressed row, sparse, parallel 2252 2253 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2254 @*/ 2255 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2256 { 2257 PetscErrorCode ierr; 2258 2259 PetscFunctionBegin; 2260 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2261 PetscValidType(B,1); 2262 PetscValidLogicalCollectiveInt(B,bs,2); 2263 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2264 PetscFunctionReturn(0); 2265 } 2266 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatCreateSBAIJ" 2269 /*@C 2270 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2271 (block compressed row). For good matrix assembly performance 2272 the user should preallocate the matrix storage by setting the parameters 2273 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2274 performance can be increased by more than a factor of 50. 2275 2276 Collective on MPI_Comm 2277 2278 Input Parameters: 2279 + comm - MPI communicator 2280 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2281 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2282 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2283 This value should be the same as the local size used in creating the 2284 y vector for the matrix-vector product y = Ax. 2285 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2286 This value should be the same as the local size used in creating the 2287 x vector for the matrix-vector product y = Ax. 2288 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2289 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2290 . d_nz - number of block nonzeros per block row in diagonal portion of local 2291 submatrix (same for all local rows) 2292 . d_nnz - array containing the number of block nonzeros in the various block rows 2293 in the upper triangular portion of the in diagonal portion of the local 2294 (possibly different for each block block row) or NULL. 2295 If you plan to factor the matrix you must leave room for the diagonal entry and 2296 set its value even if it is zero. 2297 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2298 submatrix (same for all local rows). 2299 - o_nnz - array containing the number of nonzeros in the various block rows of the 2300 off-diagonal portion of the local submatrix (possibly different for 2301 each block row) or NULL. 2302 2303 Output Parameter: 2304 . A - the matrix 2305 2306 Options Database Keys: 2307 . -mat_no_unroll - uses code that does not unroll the loops in the 2308 block calculations (much slower) 2309 . -mat_block_size - size of the blocks to use 2310 . -mat_mpi - use the parallel matrix data structures even on one processor 2311 (defaults to using SeqBAIJ format on one processor) 2312 2313 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2314 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2315 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2316 2317 Notes: 2318 The number of rows and columns must be divisible by blocksize. 2319 This matrix type does not support complex Hermitian operation. 2320 2321 The user MUST specify either the local or global matrix dimensions 2322 (possibly both). 2323 2324 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2325 than it must be used on all processors that share the object for that argument. 2326 2327 If the *_nnz parameter is given then the *_nz parameter is ignored 2328 2329 Storage Information: 2330 For a square global matrix we define each processor's diagonal portion 2331 to be its local rows and the corresponding columns (a square submatrix); 2332 each processor's off-diagonal portion encompasses the remainder of the 2333 local matrix (a rectangular submatrix). 2334 2335 The user can specify preallocated storage for the diagonal part of 2336 the local submatrix with either d_nz or d_nnz (not both). Set 2337 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2338 memory allocation. Likewise, specify preallocated storage for the 2339 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2340 2341 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2342 the figure below we depict these three local rows and all columns (0-11). 2343 2344 .vb 2345 0 1 2 3 4 5 6 7 8 9 10 11 2346 -------------------------- 2347 row 3 |. . . d d d o o o o o o 2348 row 4 |. . . d d d o o o o o o 2349 row 5 |. . . d d d o o o o o o 2350 -------------------------- 2351 .ve 2352 2353 Thus, any entries in the d locations are stored in the d (diagonal) 2354 submatrix, and any entries in the o locations are stored in the 2355 o (off-diagonal) submatrix. Note that the d matrix is stored in 2356 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2357 2358 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2359 plus the diagonal part of the d matrix, 2360 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2361 In general, for PDE problems in which most nonzeros are near the diagonal, 2362 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2363 or you will get TERRIBLE performance; see the users' manual chapter on 2364 matrices. 2365 2366 Level: intermediate 2367 2368 .keywords: matrix, block, aij, compressed row, sparse, parallel 2369 2370 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2371 @*/ 2372 2373 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2374 { 2375 PetscErrorCode ierr; 2376 PetscMPIInt size; 2377 2378 PetscFunctionBegin; 2379 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2380 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2381 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2382 if (size > 1) { 2383 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2384 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2385 } else { 2386 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2387 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2388 } 2389 PetscFunctionReturn(0); 2390 } 2391 2392 2393 #undef __FUNCT__ 2394 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2395 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2396 { 2397 Mat mat; 2398 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2399 PetscErrorCode ierr; 2400 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2401 PetscScalar *array; 2402 2403 PetscFunctionBegin; 2404 *newmat = 0; 2405 2406 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2407 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2408 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2409 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2410 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2411 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2412 2413 mat->factortype = matin->factortype; 2414 mat->preallocated = PETSC_TRUE; 2415 mat->assembled = PETSC_TRUE; 2416 mat->insertmode = NOT_SET_VALUES; 2417 2418 a = (Mat_MPISBAIJ*)mat->data; 2419 a->bs2 = oldmat->bs2; 2420 a->mbs = oldmat->mbs; 2421 a->nbs = oldmat->nbs; 2422 a->Mbs = oldmat->Mbs; 2423 a->Nbs = oldmat->Nbs; 2424 2425 2426 a->size = oldmat->size; 2427 a->rank = oldmat->rank; 2428 a->donotstash = oldmat->donotstash; 2429 a->roworiented = oldmat->roworiented; 2430 a->rowindices = 0; 2431 a->rowvalues = 0; 2432 a->getrowactive = PETSC_FALSE; 2433 a->barray = 0; 2434 a->rstartbs = oldmat->rstartbs; 2435 a->rendbs = oldmat->rendbs; 2436 a->cstartbs = oldmat->cstartbs; 2437 a->cendbs = oldmat->cendbs; 2438 2439 /* hash table stuff */ 2440 a->ht = 0; 2441 a->hd = 0; 2442 a->ht_size = 0; 2443 a->ht_flag = oldmat->ht_flag; 2444 a->ht_fact = oldmat->ht_fact; 2445 a->ht_total_ct = 0; 2446 a->ht_insert_ct = 0; 2447 2448 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2449 if (oldmat->colmap) { 2450 #if defined(PETSC_USE_CTABLE) 2451 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2452 #else 2453 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2454 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2455 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2456 #endif 2457 } else a->colmap = 0; 2458 2459 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2460 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2461 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2462 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2463 } else a->garray = 0; 2464 2465 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2466 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2467 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2468 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2469 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2470 2471 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2472 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2473 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2474 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2475 2476 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2477 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2478 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2479 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2480 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2481 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2482 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2483 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2484 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2485 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2486 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2487 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2488 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2489 2490 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2491 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2492 a->sMvctx = oldmat->sMvctx; 2493 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2494 2495 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2496 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2497 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2498 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2499 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2500 *newmat = mat; 2501 PetscFunctionReturn(0); 2502 } 2503 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "MatLoad_MPISBAIJ" 2506 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2507 { 2508 PetscErrorCode ierr; 2509 PetscInt i,nz,j,rstart,rend; 2510 PetscScalar *vals,*buf; 2511 MPI_Comm comm; 2512 MPI_Status status; 2513 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2514 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2515 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2516 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2517 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2518 PetscInt dcount,kmax,k,nzcount,tmp; 2519 int fd; 2520 2521 PetscFunctionBegin; 2522 /* force binary viewer to load .info file if it has not yet done so */ 2523 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2524 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2525 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2526 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2527 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2528 if (bs < 0) bs = 1; 2529 2530 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2531 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2532 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2533 if (!rank) { 2534 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2535 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2536 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2537 } 2538 2539 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2540 M = header[1]; 2541 N = header[2]; 2542 2543 /* If global sizes are set, check if they are consistent with that given in the file */ 2544 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2545 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2546 2547 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2548 2549 /* 2550 This code adds extra rows to make sure the number of rows is 2551 divisible by the blocksize 2552 */ 2553 Mbs = M/bs; 2554 extra_rows = bs - M + bs*(Mbs); 2555 if (extra_rows == bs) extra_rows = 0; 2556 else Mbs++; 2557 if (extra_rows &&!rank) { 2558 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2559 } 2560 2561 /* determine ownership of all rows */ 2562 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2563 mbs = Mbs/size + ((Mbs % size) > rank); 2564 m = mbs*bs; 2565 } else { /* User Set */ 2566 m = newmat->rmap->n; 2567 mbs = m/bs; 2568 } 2569 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2570 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2571 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2572 rowners[0] = 0; 2573 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2574 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2575 rstart = rowners[rank]; 2576 rend = rowners[rank+1]; 2577 2578 /* distribute row lengths to all processors */ 2579 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2580 if (!rank) { 2581 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2582 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2583 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2584 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2585 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2586 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2587 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2588 } else { 2589 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2590 } 2591 2592 if (!rank) { /* procs[0] */ 2593 /* calculate the number of nonzeros on each processor */ 2594 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2595 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2596 for (i=0; i<size; i++) { 2597 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2598 procsnz[i] += rowlengths[j]; 2599 } 2600 } 2601 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2602 2603 /* determine max buffer needed and allocate it */ 2604 maxnz = 0; 2605 for (i=0; i<size; i++) { 2606 maxnz = PetscMax(maxnz,procsnz[i]); 2607 } 2608 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2609 2610 /* read in my part of the matrix column indices */ 2611 nz = procsnz[0]; 2612 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2613 mycols = ibuf; 2614 if (size == 1) nz -= extra_rows; 2615 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2616 if (size == 1) { 2617 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2618 } 2619 2620 /* read in every ones (except the last) and ship off */ 2621 for (i=1; i<size-1; i++) { 2622 nz = procsnz[i]; 2623 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2624 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2625 } 2626 /* read in the stuff for the last proc */ 2627 if (size != 1) { 2628 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2629 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2630 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2631 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2632 } 2633 ierr = PetscFree(cols);CHKERRQ(ierr); 2634 } else { /* procs[i], i>0 */ 2635 /* determine buffer space needed for message */ 2636 nz = 0; 2637 for (i=0; i<m; i++) nz += locrowlens[i]; 2638 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2639 mycols = ibuf; 2640 /* receive message of column indices*/ 2641 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2642 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2643 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2644 } 2645 2646 /* loop over local rows, determining number of off diagonal entries */ 2647 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2648 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2649 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2650 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2651 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2652 rowcount = 0; 2653 nzcount = 0; 2654 for (i=0; i<mbs; i++) { 2655 dcount = 0; 2656 odcount = 0; 2657 for (j=0; j<bs; j++) { 2658 kmax = locrowlens[rowcount]; 2659 for (k=0; k<kmax; k++) { 2660 tmp = mycols[nzcount++]/bs; /* block col. index */ 2661 if (!mask[tmp]) { 2662 mask[tmp] = 1; 2663 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2664 else masked1[dcount++] = tmp; /* entry in diag portion */ 2665 } 2666 } 2667 rowcount++; 2668 } 2669 2670 dlens[i] = dcount; /* d_nzz[i] */ 2671 odlens[i] = odcount; /* o_nzz[i] */ 2672 2673 /* zero out the mask elements we set */ 2674 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2675 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2676 } 2677 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2678 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2679 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2680 2681 if (!rank) { 2682 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2683 /* read in my part of the matrix numerical values */ 2684 nz = procsnz[0]; 2685 vals = buf; 2686 mycols = ibuf; 2687 if (size == 1) nz -= extra_rows; 2688 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2689 if (size == 1) { 2690 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2691 } 2692 2693 /* insert into matrix */ 2694 jj = rstart*bs; 2695 for (i=0; i<m; i++) { 2696 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2697 mycols += locrowlens[i]; 2698 vals += locrowlens[i]; 2699 jj++; 2700 } 2701 2702 /* read in other processors (except the last one) and ship out */ 2703 for (i=1; i<size-1; i++) { 2704 nz = procsnz[i]; 2705 vals = buf; 2706 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2707 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2708 } 2709 /* the last proc */ 2710 if (size != 1) { 2711 nz = procsnz[i] - extra_rows; 2712 vals = buf; 2713 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2714 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2715 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2716 } 2717 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2718 2719 } else { 2720 /* receive numeric values */ 2721 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2722 2723 /* receive message of values*/ 2724 vals = buf; 2725 mycols = ibuf; 2726 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2727 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2728 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2729 2730 /* insert into matrix */ 2731 jj = rstart*bs; 2732 for (i=0; i<m; i++) { 2733 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2734 mycols += locrowlens[i]; 2735 vals += locrowlens[i]; 2736 jj++; 2737 } 2738 } 2739 2740 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2741 ierr = PetscFree(buf);CHKERRQ(ierr); 2742 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2743 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2744 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2745 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2746 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2747 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2748 PetscFunctionReturn(0); 2749 } 2750 2751 #undef __FUNCT__ 2752 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2753 /*XXXXX@ 2754 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2755 2756 Input Parameters: 2757 . mat - the matrix 2758 . fact - factor 2759 2760 Not Collective on Mat, each process can have a different hash factor 2761 2762 Level: advanced 2763 2764 Notes: 2765 This can also be set by the command line option: -mat_use_hash_table fact 2766 2767 .keywords: matrix, hashtable, factor, HT 2768 2769 .seealso: MatSetOption() 2770 @XXXXX*/ 2771 2772 2773 #undef __FUNCT__ 2774 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2775 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2776 { 2777 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2778 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2779 PetscReal atmp; 2780 PetscReal *work,*svalues,*rvalues; 2781 PetscErrorCode ierr; 2782 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2783 PetscMPIInt rank,size; 2784 PetscInt *rowners_bs,dest,count,source; 2785 PetscScalar *va; 2786 MatScalar *ba; 2787 MPI_Status stat; 2788 2789 PetscFunctionBegin; 2790 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2791 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2792 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2793 2794 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2795 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2796 2797 bs = A->rmap->bs; 2798 mbs = a->mbs; 2799 Mbs = a->Mbs; 2800 ba = b->a; 2801 bi = b->i; 2802 bj = b->j; 2803 2804 /* find ownerships */ 2805 rowners_bs = A->rmap->range; 2806 2807 /* each proc creates an array to be distributed */ 2808 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2809 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2810 2811 /* row_max for B */ 2812 if (rank != size-1) { 2813 for (i=0; i<mbs; i++) { 2814 ncols = bi[1] - bi[0]; bi++; 2815 brow = bs*i; 2816 for (j=0; j<ncols; j++) { 2817 bcol = bs*(*bj); 2818 for (kcol=0; kcol<bs; kcol++) { 2819 col = bcol + kcol; /* local col index */ 2820 col += rowners_bs[rank+1]; /* global col index */ 2821 for (krow=0; krow<bs; krow++) { 2822 atmp = PetscAbsScalar(*ba); ba++; 2823 row = brow + krow; /* local row index */ 2824 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2825 if (work[col] < atmp) work[col] = atmp; 2826 } 2827 } 2828 bj++; 2829 } 2830 } 2831 2832 /* send values to its owners */ 2833 for (dest=rank+1; dest<size; dest++) { 2834 svalues = work + rowners_bs[dest]; 2835 count = rowners_bs[dest+1]-rowners_bs[dest]; 2836 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2837 } 2838 } 2839 2840 /* receive values */ 2841 if (rank) { 2842 rvalues = work; 2843 count = rowners_bs[rank+1]-rowners_bs[rank]; 2844 for (source=0; source<rank; source++) { 2845 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2846 /* process values */ 2847 for (i=0; i<count; i++) { 2848 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2849 } 2850 } 2851 } 2852 2853 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2854 ierr = PetscFree(work);CHKERRQ(ierr); 2855 PetscFunctionReturn(0); 2856 } 2857 2858 #undef __FUNCT__ 2859 #define __FUNCT__ "MatSOR_MPISBAIJ" 2860 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2861 { 2862 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2863 PetscErrorCode ierr; 2864 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2865 PetscScalar *x,*ptr,*from; 2866 Vec bb1; 2867 const PetscScalar *b; 2868 2869 PetscFunctionBegin; 2870 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2871 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2872 2873 if (flag == SOR_APPLY_UPPER) { 2874 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2875 PetscFunctionReturn(0); 2876 } 2877 2878 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2879 if (flag & SOR_ZERO_INITIAL_GUESS) { 2880 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2881 its--; 2882 } 2883 2884 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2885 while (its--) { 2886 2887 /* lower triangular part: slvec0b = - B^T*xx */ 2888 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2889 2890 /* copy xx into slvec0a */ 2891 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2892 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2893 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2894 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2895 2896 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2897 2898 /* copy bb into slvec1a */ 2899 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2900 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2901 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2902 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2903 2904 /* set slvec1b = 0 */ 2905 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2906 2907 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2908 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2909 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2910 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2911 2912 /* upper triangular part: bb1 = bb1 - B*x */ 2913 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2914 2915 /* local diagonal sweep */ 2916 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2917 } 2918 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2919 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2920 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2921 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2922 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2923 } else if (flag & SOR_EISENSTAT) { 2924 Vec xx1; 2925 PetscBool hasop; 2926 const PetscScalar *diag; 2927 PetscScalar *sl,scale = (omega - 2.0)/omega; 2928 PetscInt i,n; 2929 2930 if (!mat->xx1) { 2931 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2932 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2933 } 2934 xx1 = mat->xx1; 2935 bb1 = mat->bb1; 2936 2937 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2938 2939 if (!mat->diag) { 2940 /* this is wrong for same matrix with new nonzero values */ 2941 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2942 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2943 } 2944 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2945 2946 if (hasop) { 2947 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2948 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2949 } else { 2950 /* 2951 These two lines are replaced by code that may be a bit faster for a good compiler 2952 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2953 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2954 */ 2955 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2956 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2957 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2958 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2959 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2960 if (omega == 1.0) { 2961 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2962 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2963 } else { 2964 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2965 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2966 } 2967 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2968 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2969 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2970 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2971 } 2972 2973 /* multiply off-diagonal portion of matrix */ 2974 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2975 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2976 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2977 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2978 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2979 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2980 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2981 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2982 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2983 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2984 2985 /* local sweep */ 2986 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2987 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2988 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2989 PetscFunctionReturn(0); 2990 } 2991 2992 #undef __FUNCT__ 2993 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2994 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2995 { 2996 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2997 PetscErrorCode ierr; 2998 Vec lvec1,bb1; 2999 3000 PetscFunctionBegin; 3001 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3002 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3003 3004 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3005 if (flag & SOR_ZERO_INITIAL_GUESS) { 3006 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3007 its--; 3008 } 3009 3010 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 3011 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3012 while (its--) { 3013 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3014 3015 /* lower diagonal part: bb1 = bb - B^T*xx */ 3016 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3017 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3018 3019 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3020 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3021 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3022 3023 /* upper diagonal part: bb1 = bb1 - B*x */ 3024 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3025 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3026 3027 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3028 3029 /* diagonal sweep */ 3030 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3031 } 3032 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3033 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3034 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3035 PetscFunctionReturn(0); 3036 } 3037 3038 #undef __FUNCT__ 3039 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 3040 /*@ 3041 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3042 CSR format the local rows. 3043 3044 Collective on MPI_Comm 3045 3046 Input Parameters: 3047 + comm - MPI communicator 3048 . bs - the block size, only a block size of 1 is supported 3049 . m - number of local rows (Cannot be PETSC_DECIDE) 3050 . n - This value should be the same as the local size used in creating the 3051 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3052 calculated if N is given) For square matrices n is almost always m. 3053 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3054 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3055 . i - row indices 3056 . j - column indices 3057 - a - matrix values 3058 3059 Output Parameter: 3060 . mat - the matrix 3061 3062 Level: intermediate 3063 3064 Notes: 3065 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3066 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3067 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3068 3069 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3070 3071 .keywords: matrix, aij, compressed row, sparse, parallel 3072 3073 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3074 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3075 @*/ 3076 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3077 { 3078 PetscErrorCode ierr; 3079 3080 3081 PetscFunctionBegin; 3082 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3083 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3084 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3085 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3086 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3087 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3088 PetscFunctionReturn(0); 3089 } 3090 3091 3092 #undef __FUNCT__ 3093 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 3094 /*@C 3095 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3096 (the default parallel PETSc format). 3097 3098 Collective on MPI_Comm 3099 3100 Input Parameters: 3101 + B - the matrix 3102 . bs - the block size 3103 . i - the indices into j for the start of each local row (starts with zero) 3104 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3105 - v - optional values in the matrix 3106 3107 Level: developer 3108 3109 .keywords: matrix, aij, compressed row, sparse, parallel 3110 3111 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3112 @*/ 3113 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3114 { 3115 PetscErrorCode ierr; 3116 3117 PetscFunctionBegin; 3118 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3119 PetscFunctionReturn(0); 3120 } 3121 3122 #undef __FUNCT__ 3123 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 3124 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3125 { 3126 PetscErrorCode ierr; 3127 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3128 PetscInt *indx; 3129 PetscScalar *values; 3130 3131 PetscFunctionBegin; 3132 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3133 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3134 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3135 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 3136 PetscInt *bindx,rmax=a->rmax,j; 3137 3138 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3139 mbs = m/bs; Nbs = N/cbs; 3140 if (n == PETSC_DECIDE) { 3141 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3142 } 3143 /* Check sum(n) = Nbs */ 3144 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3145 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3146 3147 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3148 rstart -= mbs; 3149 3150 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3151 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3152 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3153 for (i=0; i<mbs; i++) { 3154 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3155 nnz = nnz/bs; 3156 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3157 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3158 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3159 } 3160 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3161 ierr = PetscFree(bindx);CHKERRQ(ierr); 3162 3163 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3164 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3165 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3166 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 3167 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3168 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3169 } 3170 3171 /* numeric phase */ 3172 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3173 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3174 3175 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3176 for (i=0; i<m; i++) { 3177 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3178 Ii = i + rstart; 3179 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3180 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3181 } 3182 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3183 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3184 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3185 PetscFunctionReturn(0); 3186 } 3187