1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 12 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 19 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 20 PetscFunctionReturn(0); 21 } 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 25 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 26 { 27 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 32 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 33 PetscFunctionReturn(0); 34 } 35 36 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 37 { \ 38 \ 39 brow = row/bs; \ 40 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 41 rmax = aimax[brow]; nrow = ailen[brow]; \ 42 bcol = col/bs; \ 43 ridx = row % bs; cidx = col % bs; \ 44 low = 0; high = nrow; \ 45 while (high-low > 3) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > bcol) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > bcol) break; \ 52 if (rp[_i] == bcol) { \ 53 bap = ap + bs2*_i + bs*cidx + ridx; \ 54 if (addv == ADD_VALUES) *bap += value; \ 55 else *bap = value; \ 56 goto a_noinsert; \ 57 } \ 58 } \ 59 if (a->nonew == 1) goto a_noinsert; \ 60 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 61 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 62 N = nrow++ - 1; \ 63 /* shift up all the later entries in this row */ \ 64 for (ii=N; ii>=_i; ii--) { \ 65 rp[ii+1] = rp[ii]; \ 66 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 67 } \ 68 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 69 rp[_i] = bcol; \ 70 ap[bs2*_i + bs*cidx + ridx] = value; \ 71 A->nonzerostate++;\ 72 a_noinsert:; \ 73 ailen[brow] = nrow; \ 74 } 75 76 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 77 { \ 78 brow = row/bs; \ 79 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 80 rmax = bimax[brow]; nrow = bilen[brow]; \ 81 bcol = col/bs; \ 82 ridx = row % bs; cidx = col % bs; \ 83 low = 0; high = nrow; \ 84 while (high-low > 3) { \ 85 t = (low+high)/2; \ 86 if (rp[t] > bcol) high = t; \ 87 else low = t; \ 88 } \ 89 for (_i=low; _i<high; _i++) { \ 90 if (rp[_i] > bcol) break; \ 91 if (rp[_i] == bcol) { \ 92 bap = ap + bs2*_i + bs*cidx + ridx; \ 93 if (addv == ADD_VALUES) *bap += value; \ 94 else *bap = value; \ 95 goto b_noinsert; \ 96 } \ 97 } \ 98 if (b->nonew == 1) goto b_noinsert; \ 99 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 100 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for (ii=N; ii>=_i; ii--) { \ 104 rp[ii+1] = rp[ii]; \ 105 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 106 } \ 107 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 B->nonzerostate++;\ 111 b_noinsert:; \ 112 bilen[brow] = nrow; \ 113 } 114 115 /* Only add/insert a(i,j) with i<=j (blocks). 116 Any a(i,j) with i>j input by user is ingored. 117 */ 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatSetValues_MPISBAIJ" 120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 121 { 122 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 123 MatScalar value; 124 PetscBool roworiented = baij->roworiented; 125 PetscErrorCode ierr; 126 PetscInt i,j,row,col; 127 PetscInt rstart_orig=mat->rmap->rstart; 128 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 129 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 130 131 /* Some Variables required in the macro */ 132 Mat A = baij->A; 133 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 134 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 135 MatScalar *aa =a->a; 136 137 Mat B = baij->B; 138 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 139 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 140 MatScalar *ba =b->a; 141 142 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 143 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 144 MatScalar *ap,*bap; 145 146 /* for stash */ 147 PetscInt n_loc, *in_loc = NULL; 148 MatScalar *v_loc = NULL; 149 150 PetscFunctionBegin; 151 if (!baij->donotstash) { 152 if (n > baij->n_loc) { 153 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 154 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 155 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 156 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 157 158 baij->n_loc = n; 159 } 160 in_loc = baij->in_loc; 161 v_loc = baij->v_loc; 162 } 163 164 for (i=0; i<m; i++) { 165 if (im[i] < 0) continue; 166 #if defined(PETSC_USE_DEBUG) 167 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 168 #endif 169 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 170 row = im[i] - rstart_orig; /* local row index */ 171 for (j=0; j<n; j++) { 172 if (im[i]/bs > in[j]/bs) { 173 if (a->ignore_ltriangular) { 174 continue; /* ignore lower triangular blocks */ 175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 176 } 177 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 178 col = in[j] - cstart_orig; /* local col index */ 179 brow = row/bs; bcol = col/bs; 180 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 181 if (roworiented) value = v[i*n+j]; 182 else value = v[i+j*m]; 183 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 184 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 185 } else if (in[j] < 0) continue; 186 #if defined(PETSC_USE_DEBUG) 187 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 188 #endif 189 else { /* off-diag entry (B) */ 190 if (mat->was_assembled) { 191 if (!baij->colmap) { 192 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 193 } 194 #if defined(PETSC_USE_CTABLE) 195 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 196 col = col - 1; 197 #else 198 col = baij->colmap[in[j]/bs] - 1; 199 #endif 200 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 201 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 202 col = in[j]; 203 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 204 B = baij->B; 205 b = (Mat_SeqBAIJ*)(B)->data; 206 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 207 ba = b->a; 208 } else col += in[j]%bs; 209 } else col = in[j]; 210 if (roworiented) value = v[i*n+j]; 211 else value = v[i+j*m]; 212 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 213 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 } 216 } else { /* off processor entry */ 217 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 218 if (!baij->donotstash) { 219 mat->assembled = PETSC_FALSE; 220 n_loc = 0; 221 for (j=0; j<n; j++) { 222 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 223 in_loc[n_loc] = in[j]; 224 if (roworiented) { 225 v_loc[n_loc] = v[i*n+j]; 226 } else { 227 v_loc[n_loc] = v[j*m+i]; 228 } 229 n_loc++; 230 } 231 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined" 240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 241 { 242 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 243 PetscErrorCode ierr; 244 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 245 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 246 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 247 PetscBool roworiented=a->roworiented; 248 const PetscScalar *value = v; 249 MatScalar *ap,*aa = a->a,*bap; 250 251 PetscFunctionBegin; 252 if (col < row) { 253 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 254 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 255 } 256 rp = aj + ai[row]; 257 ap = aa + bs2*ai[row]; 258 rmax = imax[row]; 259 nrow = ailen[row]; 260 value = v; 261 low = 0; 262 high = nrow; 263 264 while (high-low > 7) { 265 t = (low+high)/2; 266 if (rp[t] > col) high = t; 267 else low = t; 268 } 269 for (i=low; i<high; i++) { 270 if (rp[i] > col) break; 271 if (rp[i] == col) { 272 bap = ap + bs2*i; 273 if (roworiented) { 274 if (is == ADD_VALUES) { 275 for (ii=0; ii<bs; ii++) { 276 for (jj=ii; jj<bs2; jj+=bs) { 277 bap[jj] += *value++; 278 } 279 } 280 } else { 281 for (ii=0; ii<bs; ii++) { 282 for (jj=ii; jj<bs2; jj+=bs) { 283 bap[jj] = *value++; 284 } 285 } 286 } 287 } else { 288 if (is == ADD_VALUES) { 289 for (ii=0; ii<bs; ii++) { 290 for (jj=0; jj<bs; jj++) { 291 *bap++ += *value++; 292 } 293 } 294 } else { 295 for (ii=0; ii<bs; ii++) { 296 for (jj=0; jj<bs; jj++) { 297 *bap++ = *value++; 298 } 299 } 300 } 301 } 302 goto noinsert2; 303 } 304 } 305 if (nonew == 1) goto noinsert2; 306 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 307 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 308 N = nrow++ - 1; high++; 309 /* shift up all the later entries in this row */ 310 for (ii=N; ii>=i; ii--) { 311 rp[ii+1] = rp[ii]; 312 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 313 } 314 if (N >= i) { 315 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 316 } 317 rp[i] = col; 318 bap = ap + bs2*i; 319 if (roworiented) { 320 for (ii=0; ii<bs; ii++) { 321 for (jj=ii; jj<bs2; jj+=bs) { 322 bap[jj] = *value++; 323 } 324 } 325 } else { 326 for (ii=0; ii<bs; ii++) { 327 for (jj=0; jj<bs; jj++) { 328 *bap++ = *value++; 329 } 330 } 331 } 332 noinsert2:; 333 ailen[row] = nrow; 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined" 339 /* 340 This routine is exactly duplicated in mpibaij.c 341 */ 342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 343 { 344 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 345 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 346 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 347 PetscErrorCode ierr; 348 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 349 PetscBool roworiented=a->roworiented; 350 const PetscScalar *value = v; 351 MatScalar *ap,*aa = a->a,*bap; 352 353 PetscFunctionBegin; 354 rp = aj + ai[row]; 355 ap = aa + bs2*ai[row]; 356 rmax = imax[row]; 357 nrow = ailen[row]; 358 low = 0; 359 high = nrow; 360 value = v; 361 while (high-low > 7) { 362 t = (low+high)/2; 363 if (rp[t] > col) high = t; 364 else low = t; 365 } 366 for (i=low; i<high; i++) { 367 if (rp[i] > col) break; 368 if (rp[i] == col) { 369 bap = ap + bs2*i; 370 if (roworiented) { 371 if (is == ADD_VALUES) { 372 for (ii=0; ii<bs; ii++) { 373 for (jj=ii; jj<bs2; jj+=bs) { 374 bap[jj] += *value++; 375 } 376 } 377 } else { 378 for (ii=0; ii<bs; ii++) { 379 for (jj=ii; jj<bs2; jj+=bs) { 380 bap[jj] = *value++; 381 } 382 } 383 } 384 } else { 385 if (is == ADD_VALUES) { 386 for (ii=0; ii<bs; ii++,value+=bs) { 387 for (jj=0; jj<bs; jj++) { 388 bap[jj] += value[jj]; 389 } 390 bap += bs; 391 } 392 } else { 393 for (ii=0; ii<bs; ii++,value+=bs) { 394 for (jj=0; jj<bs; jj++) { 395 bap[jj] = value[jj]; 396 } 397 bap += bs; 398 } 399 } 400 } 401 goto noinsert2; 402 } 403 } 404 if (nonew == 1) goto noinsert2; 405 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 406 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 407 N = nrow++ - 1; high++; 408 /* shift up all the later entries in this row */ 409 for (ii=N; ii>=i; ii--) { 410 rp[ii+1] = rp[ii]; 411 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 412 } 413 if (N >= i) { 414 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 415 } 416 rp[i] = col; 417 bap = ap + bs2*i; 418 if (roworiented) { 419 for (ii=0; ii<bs; ii++) { 420 for (jj=ii; jj<bs2; jj+=bs) { 421 bap[jj] = *value++; 422 } 423 } 424 } else { 425 for (ii=0; ii<bs; ii++) { 426 for (jj=0; jj<bs; jj++) { 427 *bap++ = *value++; 428 } 429 } 430 } 431 noinsert2:; 432 ailen[row] = nrow; 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNCT__ 437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 438 /* 439 This routine could be optimized by removing the need for the block copy below and passing stride information 440 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 441 */ 442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 443 { 444 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 445 const MatScalar *value; 446 MatScalar *barray =baij->barray; 447 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 448 PetscErrorCode ierr; 449 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 450 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 451 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 452 453 PetscFunctionBegin; 454 if (!barray) { 455 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 456 baij->barray = barray; 457 } 458 459 if (roworiented) { 460 stepval = (n-1)*bs; 461 } else { 462 stepval = (m-1)*bs; 463 } 464 for (i=0; i<m; i++) { 465 if (im[i] < 0) continue; 466 #if defined(PETSC_USE_DEBUG) 467 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 468 #endif 469 if (im[i] >= rstart && im[i] < rend) { 470 row = im[i] - rstart; 471 for (j=0; j<n; j++) { 472 if (im[i] > in[j]) { 473 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 474 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 475 } 476 /* If NumCol = 1 then a copy is not required */ 477 if ((roworiented) && (n == 1)) { 478 barray = (MatScalar*) v + i*bs2; 479 } else if ((!roworiented) && (m == 1)) { 480 barray = (MatScalar*) v + j*bs2; 481 } else { /* Here a copy is required */ 482 if (roworiented) { 483 value = v + i*(stepval+bs)*bs + j*bs; 484 } else { 485 value = v + j*(stepval+bs)*bs + i*bs; 486 } 487 for (ii=0; ii<bs; ii++,value+=stepval) { 488 for (jj=0; jj<bs; jj++) { 489 *barray++ = *value++; 490 } 491 } 492 barray -=bs2; 493 } 494 495 if (in[j] >= cstart && in[j] < cend) { 496 col = in[j] - cstart; 497 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 498 } else if (in[j] < 0) continue; 499 #if defined(PETSC_USE_DEBUG) 500 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 501 #endif 502 else { 503 if (mat->was_assembled) { 504 if (!baij->colmap) { 505 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 506 } 507 508 #if defined(PETSC_USE_DEBUG) 509 #if defined(PETSC_USE_CTABLE) 510 { PetscInt data; 511 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 512 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 513 } 514 #else 515 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 516 #endif 517 #endif 518 #if defined(PETSC_USE_CTABLE) 519 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col = (col - 1)/bs; 521 #else 522 col = (baij->colmap[in[j]] - 1)/bs; 523 #endif 524 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 } 528 } else col = in[j]; 529 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 530 } 531 } 532 } else { 533 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 534 if (!baij->donotstash) { 535 if (roworiented) { 536 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 537 } else { 538 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 539 } 540 } 541 } 542 } 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_MPISBAIJ" 548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 549 { 550 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 551 PetscErrorCode ierr; 552 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 553 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 554 555 PetscFunctionBegin; 556 for (i=0; i<m; i++) { 557 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 558 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 559 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 560 row = idxm[i] - bsrstart; 561 for (j=0; j<n; j++) { 562 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 563 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 564 if (idxn[j] >= bscstart && idxn[j] < bscend) { 565 col = idxn[j] - bscstart; 566 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 567 } else { 568 if (!baij->colmap) { 569 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 570 } 571 #if defined(PETSC_USE_CTABLE) 572 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 573 data--; 574 #else 575 data = baij->colmap[idxn[j]/bs]-1; 576 #endif 577 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 578 else { 579 col = data + idxn[j]%bs; 580 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } 582 } 583 } 584 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatNorm_MPISBAIJ" 591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 592 { 593 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscReal sum[2],*lnorm2; 596 597 PetscFunctionBegin; 598 if (baij->size == 1) { 599 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 600 } else { 601 if (type == NORM_FROBENIUS) { 602 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 603 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 605 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 606 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 607 ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 608 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 609 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 610 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 611 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 612 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 613 PetscReal *rsum,*rsum2,vabs; 614 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 615 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 616 MatScalar *v; 617 618 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 619 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 620 /* Amat */ 621 v = amat->a; jj = amat->j; 622 for (brow=0; brow<mbs; brow++) { 623 grow = bs*(rstart + brow); 624 nz = amat->i[brow+1] - amat->i[brow]; 625 for (bcol=0; bcol<nz; bcol++) { 626 gcol = bs*(rstart + *jj); jj++; 627 for (col=0; col<bs; col++) { 628 for (row=0; row<bs; row++) { 629 vabs = PetscAbsScalar(*v); v++; 630 rsum[gcol+col] += vabs; 631 /* non-diagonal block */ 632 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 633 } 634 } 635 } 636 } 637 /* Bmat */ 638 v = bmat->a; jj = bmat->j; 639 for (brow=0; brow<mbs; brow++) { 640 grow = bs*(rstart + brow); 641 nz = bmat->i[brow+1] - bmat->i[brow]; 642 for (bcol=0; bcol<nz; bcol++) { 643 gcol = bs*garray[*jj]; jj++; 644 for (col=0; col<bs; col++) { 645 for (row=0; row<bs; row++) { 646 vabs = PetscAbsScalar(*v); v++; 647 rsum[gcol+col] += vabs; 648 rsum[grow+row] += vabs; 649 } 650 } 651 } 652 } 653 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 654 *norm = 0.0; 655 for (col=0; col<mat->cmap->N; col++) { 656 if (rsum2[col] > *norm) *norm = rsum2[col]; 657 } 658 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 659 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 660 } 661 PetscFunctionReturn(0); 662 } 663 664 #undef __FUNCT__ 665 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 666 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 667 { 668 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 669 PetscErrorCode ierr; 670 PetscInt nstash,reallocs; 671 InsertMode addv; 672 673 PetscFunctionBegin; 674 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 675 676 /* make sure all processors are either in INSERTMODE or ADDMODE */ 677 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 678 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 679 mat->insertmode = addv; /* in case this processor had no cache */ 680 681 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 682 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 683 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 684 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 685 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 686 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 692 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 693 { 694 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 695 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 696 PetscErrorCode ierr; 697 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 698 PetscInt *row,*col; 699 PetscBool other_disassembled; 700 PetscMPIInt n; 701 PetscBool r1,r2,r3; 702 MatScalar *val; 703 InsertMode addv = mat->insertmode; 704 705 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 706 PetscFunctionBegin; 707 if (!baij->donotstash && !mat->nooffprocentries) { 708 while (1) { 709 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 710 if (!flg) break; 711 712 for (i=0; i<n;) { 713 /* Now identify the consecutive vals belonging to the same row */ 714 for (j=i,rstart=row[j]; j<n; j++) { 715 if (row[j] != rstart) break; 716 } 717 if (j < n) ncols = j-i; 718 else ncols = n-i; 719 /* Now assemble all these values with a single function call */ 720 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 721 i = j; 722 } 723 } 724 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 725 /* Now process the block-stash. Since the values are stashed column-oriented, 726 set the roworiented flag to column oriented, and after MatSetValues() 727 restore the original flags */ 728 r1 = baij->roworiented; 729 r2 = a->roworiented; 730 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 731 732 baij->roworiented = PETSC_FALSE; 733 a->roworiented = PETSC_FALSE; 734 735 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 736 while (1) { 737 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 738 if (!flg) break; 739 740 for (i=0; i<n;) { 741 /* Now identify the consecutive vals belonging to the same row */ 742 for (j=i,rstart=row[j]; j<n; j++) { 743 if (row[j] != rstart) break; 744 } 745 if (j < n) ncols = j-i; 746 else ncols = n-i; 747 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 748 i = j; 749 } 750 } 751 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 752 753 baij->roworiented = r1; 754 a->roworiented = r2; 755 756 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 757 } 758 759 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 760 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 761 762 /* determine if any processor has disassembled, if so we must 763 also disassemble ourselfs, in order that we may reassemble. */ 764 /* 765 if nonzero structure of submatrix B cannot change then we know that 766 no processor disassembled thus we can skip this stuff 767 */ 768 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 769 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 770 if (mat->was_assembled && !other_disassembled) { 771 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 772 } 773 } 774 775 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 776 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 777 } 778 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 779 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 780 781 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 782 783 baij->rowvalues = 0; 784 785 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 786 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 787 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 788 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 789 } 790 PetscFunctionReturn(0); 791 } 792 793 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer); 794 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 795 #include <petscdraw.h> 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 798 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 799 { 800 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 801 PetscErrorCode ierr; 802 PetscInt bs = mat->rmap->bs; 803 PetscMPIInt rank = baij->rank; 804 PetscBool iascii,isdraw; 805 PetscViewer sviewer; 806 PetscViewerFormat format; 807 808 PetscFunctionBegin; 809 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 810 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 811 if (iascii) { 812 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 813 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 814 MatInfo info; 815 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 816 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 817 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 818 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 819 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 820 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 821 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 822 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 823 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 824 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 825 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 826 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } else if (format == PETSC_VIEWER_ASCII_INFO) { 829 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 830 PetscFunctionReturn(0); 831 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 832 PetscFunctionReturn(0); 833 } 834 } 835 836 if (isdraw) { 837 PetscDraw draw; 838 PetscBool isnull; 839 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 840 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 841 } 842 843 { 844 /* assemble the entire matrix onto first processor. */ 845 Mat A; 846 Mat_SeqSBAIJ *Aloc; 847 Mat_SeqBAIJ *Bloc; 848 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 849 MatScalar *a; 850 const char *matname; 851 852 /* Should this be the same type as mat? */ 853 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 854 if (!rank) { 855 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 856 } else { 857 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 858 } 859 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 860 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 861 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 862 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 863 864 /* copy over the A part */ 865 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 866 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 867 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 868 869 for (i=0; i<mbs; i++) { 870 rvals[0] = bs*(baij->rstartbs + i); 871 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 872 for (j=ai[i]; j<ai[i+1]; j++) { 873 col = (baij->cstartbs+aj[j])*bs; 874 for (k=0; k<bs; k++) { 875 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 876 col++; 877 a += bs; 878 } 879 } 880 } 881 /* copy over the B part */ 882 Bloc = (Mat_SeqBAIJ*)baij->B->data; 883 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 884 for (i=0; i<mbs; i++) { 885 886 rvals[0] = bs*(baij->rstartbs + i); 887 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 888 for (j=ai[i]; j<ai[i+1]; j++) { 889 col = baij->garray[aj[j]]*bs; 890 for (k=0; k<bs; k++) { 891 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 892 col++; 893 a += bs; 894 } 895 } 896 } 897 ierr = PetscFree(rvals);CHKERRQ(ierr); 898 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 899 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 900 /* 901 Everyone has to call to draw the matrix since the graphics waits are 902 synchronized across all processors that share the PetscDraw object 903 */ 904 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 905 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 906 if (!rank) { 907 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 908 ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 909 } 910 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 911 ierr = MatDestroy(&A);CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } 915 916 #undef __FUNCT__ 917 #define __FUNCT__ "MatView_MPISBAIJ" 918 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 919 { 920 PetscErrorCode ierr; 921 PetscBool iascii,isdraw,issocket,isbinary; 922 923 PetscFunctionBegin; 924 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 925 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 926 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 927 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 928 if (iascii || isdraw || issocket || isbinary) { 929 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 930 } 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatDestroy_MPISBAIJ" 936 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 937 { 938 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 939 PetscErrorCode ierr; 940 941 PetscFunctionBegin; 942 #if defined(PETSC_USE_LOG) 943 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 944 #endif 945 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 946 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 947 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 948 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 949 #if defined(PETSC_USE_CTABLE) 950 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 951 #else 952 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 953 #endif 954 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 955 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 956 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 957 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 958 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 959 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 960 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 961 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 962 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 963 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 964 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 965 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 966 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 967 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 968 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 969 #if defined(PETSC_USE_REAL_MAT_SINGLE) 970 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 971 #endif 972 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 973 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 974 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 975 ierr = PetscFree(mat->data);CHKERRQ(ierr); 976 977 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 978 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 979 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 980 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 981 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 982 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 983 #if defined(PETSC_HAVE_ELEMENTAL) 984 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 985 #endif 986 PetscFunctionReturn(0); 987 } 988 989 #undef __FUNCT__ 990 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 991 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 992 { 993 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 994 PetscErrorCode ierr; 995 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 996 PetscScalar *from; 997 const PetscScalar *x; 998 999 PetscFunctionBegin; 1000 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1001 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1002 1003 /* diagonal part */ 1004 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1005 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1006 1007 /* subdiagonal part */ 1008 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1009 1010 /* copy x into the vec slvec0 */ 1011 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1012 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1013 1014 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1015 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1016 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1017 1018 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1019 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1020 /* supperdiagonal part */ 1021 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1022 PetscFunctionReturn(0); 1023 } 1024 1025 #undef __FUNCT__ 1026 #define __FUNCT__ "MatMult_MPISBAIJ" 1027 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1028 { 1029 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1030 PetscErrorCode ierr; 1031 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1032 PetscScalar *from; 1033 const PetscScalar *x; 1034 1035 PetscFunctionBegin; 1036 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1037 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1038 1039 /* diagonal part */ 1040 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1041 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1042 1043 /* subdiagonal part */ 1044 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1045 1046 /* copy x into the vec slvec0 */ 1047 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1048 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1049 1050 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1051 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1052 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1053 1054 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1055 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1056 /* supperdiagonal part */ 1057 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1058 PetscFunctionReturn(0); 1059 } 1060 1061 #undef __FUNCT__ 1062 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1063 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1064 { 1065 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1066 PetscErrorCode ierr; 1067 PetscInt nt; 1068 1069 PetscFunctionBegin; 1070 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1071 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1072 1073 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1074 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1075 1076 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1077 /* do diagonal part */ 1078 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1079 /* do supperdiagonal part */ 1080 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1081 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1082 /* do subdiagonal part */ 1083 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1084 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1091 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1092 { 1093 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1094 PetscErrorCode ierr; 1095 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1096 PetscScalar *from,zero=0.0; 1097 const PetscScalar *x; 1098 1099 PetscFunctionBegin; 1100 /* 1101 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1102 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1103 */ 1104 /* diagonal part */ 1105 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1106 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1107 1108 /* subdiagonal part */ 1109 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1110 1111 /* copy x into the vec slvec0 */ 1112 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1113 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1114 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1115 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1116 1117 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1118 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1119 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1120 1121 /* supperdiagonal part */ 1122 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1128 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1129 { 1130 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1131 PetscErrorCode ierr; 1132 1133 PetscFunctionBegin; 1134 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1135 /* do diagonal part */ 1136 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1137 /* do supperdiagonal part */ 1138 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1140 1141 /* do subdiagonal part */ 1142 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1143 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1144 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 /* 1149 This only works correctly for square matrices where the subblock A->A is the 1150 diagonal block 1151 */ 1152 #undef __FUNCT__ 1153 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1154 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1155 { 1156 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1157 PetscErrorCode ierr; 1158 1159 PetscFunctionBegin; 1160 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1161 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatScale_MPISBAIJ" 1167 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1168 { 1169 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1170 PetscErrorCode ierr; 1171 1172 PetscFunctionBegin; 1173 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1174 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1180 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1181 { 1182 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1183 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1184 PetscErrorCode ierr; 1185 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1186 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1187 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1188 1189 PetscFunctionBegin; 1190 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1191 mat->getrowactive = PETSC_TRUE; 1192 1193 if (!mat->rowvalues && (idx || v)) { 1194 /* 1195 allocate enough space to hold information from the longest row. 1196 */ 1197 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1198 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1199 PetscInt max = 1,mbs = mat->mbs,tmp; 1200 for (i=0; i<mbs; i++) { 1201 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1202 if (max < tmp) max = tmp; 1203 } 1204 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1205 } 1206 1207 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1208 lrow = row - brstart; /* local row index */ 1209 1210 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1211 if (!v) {pvA = 0; pvB = 0;} 1212 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1213 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1214 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1215 nztot = nzA + nzB; 1216 1217 cmap = mat->garray; 1218 if (v || idx) { 1219 if (nztot) { 1220 /* Sort by increasing column numbers, assuming A and B already sorted */ 1221 PetscInt imark = -1; 1222 if (v) { 1223 *v = v_p = mat->rowvalues; 1224 for (i=0; i<nzB; i++) { 1225 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1226 else break; 1227 } 1228 imark = i; 1229 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1230 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1231 } 1232 if (idx) { 1233 *idx = idx_p = mat->rowindices; 1234 if (imark > -1) { 1235 for (i=0; i<imark; i++) { 1236 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1237 } 1238 } else { 1239 for (i=0; i<nzB; i++) { 1240 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1241 else break; 1242 } 1243 imark = i; 1244 } 1245 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1246 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1247 } 1248 } else { 1249 if (idx) *idx = 0; 1250 if (v) *v = 0; 1251 } 1252 } 1253 *nz = nztot; 1254 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1255 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1261 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1262 { 1263 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1264 1265 PetscFunctionBegin; 1266 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1267 baij->getrowactive = PETSC_FALSE; 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1273 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1274 { 1275 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1276 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1277 1278 PetscFunctionBegin; 1279 aA->getrow_utriangular = PETSC_TRUE; 1280 PetscFunctionReturn(0); 1281 } 1282 #undef __FUNCT__ 1283 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1284 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1285 { 1286 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1287 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1288 1289 PetscFunctionBegin; 1290 aA->getrow_utriangular = PETSC_FALSE; 1291 PetscFunctionReturn(0); 1292 } 1293 1294 #undef __FUNCT__ 1295 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1296 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1297 { 1298 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1299 PetscErrorCode ierr; 1300 1301 PetscFunctionBegin; 1302 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1303 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1309 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1310 { 1311 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1316 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ() 1321 Input: isrow - distributed(parallel), 1322 iscol_local - locally owned (seq) 1323 */ 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "ISEqual_private" 1326 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1327 { 1328 PetscErrorCode ierr; 1329 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1330 const PetscInt *ptr1,*ptr2; 1331 1332 PetscFunctionBegin; 1333 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1334 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1335 if (sz1 > sz2) { 1336 *flg = PETSC_FALSE; 1337 PetscFunctionReturn(0); 1338 } 1339 1340 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1341 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1342 1343 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1344 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1345 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1346 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1347 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1348 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1349 1350 nmatch=0; 1351 k = 0; 1352 for (i=0; i<sz1; i++){ 1353 for (j=k; j<sz2; j++){ 1354 if (a1[i] == a2[j]) { 1355 k = j; nmatch++; 1356 break; 1357 } 1358 } 1359 } 1360 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1361 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1362 ierr = PetscFree(a1);CHKERRQ(ierr); 1363 ierr = PetscFree(a2);CHKERRQ(ierr); 1364 if (nmatch < sz1) { 1365 *flg = PETSC_FALSE; 1366 } else { 1367 *flg = PETSC_TRUE; 1368 } 1369 PetscFunctionReturn(0); 1370 } 1371 1372 #undef __FUNCT__ 1373 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ" 1374 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1375 { 1376 PetscErrorCode ierr; 1377 IS iscol_local; 1378 PetscInt csize; 1379 PetscBool isequal; 1380 1381 PetscFunctionBegin; 1382 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1383 if (call == MAT_REUSE_MATRIX) { 1384 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1385 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1386 } else { 1387 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1388 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1389 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1390 } 1391 1392 /* now call MatGetSubMatrix_MPIBAIJ() */ 1393 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1394 if (call == MAT_INITIAL_MATRIX) { 1395 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1396 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1397 } 1398 PetscFunctionReturn(0); 1399 } 1400 1401 #undef __FUNCT__ 1402 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1403 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1404 { 1405 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1406 PetscErrorCode ierr; 1407 1408 PetscFunctionBegin; 1409 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1410 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1416 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1417 { 1418 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1419 Mat A = a->A,B = a->B; 1420 PetscErrorCode ierr; 1421 PetscReal isend[5],irecv[5]; 1422 1423 PetscFunctionBegin; 1424 info->block_size = (PetscReal)matin->rmap->bs; 1425 1426 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1427 1428 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1429 isend[3] = info->memory; isend[4] = info->mallocs; 1430 1431 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1432 1433 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1434 isend[3] += info->memory; isend[4] += info->mallocs; 1435 if (flag == MAT_LOCAL) { 1436 info->nz_used = isend[0]; 1437 info->nz_allocated = isend[1]; 1438 info->nz_unneeded = isend[2]; 1439 info->memory = isend[3]; 1440 info->mallocs = isend[4]; 1441 } else if (flag == MAT_GLOBAL_MAX) { 1442 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1443 1444 info->nz_used = irecv[0]; 1445 info->nz_allocated = irecv[1]; 1446 info->nz_unneeded = irecv[2]; 1447 info->memory = irecv[3]; 1448 info->mallocs = irecv[4]; 1449 } else if (flag == MAT_GLOBAL_SUM) { 1450 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1451 1452 info->nz_used = irecv[0]; 1453 info->nz_allocated = irecv[1]; 1454 info->nz_unneeded = irecv[2]; 1455 info->memory = irecv[3]; 1456 info->mallocs = irecv[4]; 1457 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1458 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1459 info->fill_ratio_needed = 0; 1460 info->factor_mallocs = 0; 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1466 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1467 { 1468 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1469 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1470 PetscErrorCode ierr; 1471 1472 PetscFunctionBegin; 1473 switch (op) { 1474 case MAT_NEW_NONZERO_LOCATIONS: 1475 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1476 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1477 case MAT_KEEP_NONZERO_PATTERN: 1478 case MAT_NEW_NONZERO_LOCATION_ERR: 1479 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1480 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1481 break; 1482 case MAT_ROW_ORIENTED: 1483 a->roworiented = flg; 1484 1485 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1486 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1487 break; 1488 case MAT_NEW_DIAGONALS: 1489 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1490 break; 1491 case MAT_IGNORE_OFF_PROC_ENTRIES: 1492 a->donotstash = flg; 1493 break; 1494 case MAT_USE_HASH_TABLE: 1495 a->ht_flag = flg; 1496 break; 1497 case MAT_HERMITIAN: 1498 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1499 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1500 1501 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1502 break; 1503 case MAT_SPD: 1504 A->spd_set = PETSC_TRUE; 1505 A->spd = flg; 1506 if (flg) { 1507 A->symmetric = PETSC_TRUE; 1508 A->structurally_symmetric = PETSC_TRUE; 1509 A->symmetric_set = PETSC_TRUE; 1510 A->structurally_symmetric_set = PETSC_TRUE; 1511 } 1512 break; 1513 case MAT_SYMMETRIC: 1514 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1515 break; 1516 case MAT_STRUCTURALLY_SYMMETRIC: 1517 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1518 break; 1519 case MAT_SYMMETRY_ETERNAL: 1520 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1521 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1522 break; 1523 case MAT_IGNORE_LOWER_TRIANGULAR: 1524 aA->ignore_ltriangular = flg; 1525 break; 1526 case MAT_ERROR_LOWER_TRIANGULAR: 1527 aA->ignore_ltriangular = flg; 1528 break; 1529 case MAT_GETROW_UPPERTRIANGULAR: 1530 aA->getrow_utriangular = flg; 1531 break; 1532 default: 1533 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1534 } 1535 PetscFunctionReturn(0); 1536 } 1537 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1540 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1541 { 1542 PetscErrorCode ierr; 1543 1544 PetscFunctionBegin; 1545 if (MAT_INITIAL_MATRIX || *B != A) { 1546 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1547 } 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1553 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1554 { 1555 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1556 Mat a = baij->A, b=baij->B; 1557 PetscErrorCode ierr; 1558 PetscInt nv,m,n; 1559 PetscBool flg; 1560 1561 PetscFunctionBegin; 1562 if (ll != rr) { 1563 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1564 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1565 } 1566 if (!ll) PetscFunctionReturn(0); 1567 1568 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1569 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1570 1571 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1572 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1573 1574 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1575 1576 /* left diagonalscale the off-diagonal part */ 1577 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1578 1579 /* scale the diagonal part */ 1580 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1581 1582 /* right diagonalscale the off-diagonal part */ 1583 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1584 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1590 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1591 { 1592 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1593 PetscErrorCode ierr; 1594 1595 PetscFunctionBegin; 1596 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1597 PetscFunctionReturn(0); 1598 } 1599 1600 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1601 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MatEqual_MPISBAIJ" 1604 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1605 { 1606 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1607 Mat a,b,c,d; 1608 PetscBool flg; 1609 PetscErrorCode ierr; 1610 1611 PetscFunctionBegin; 1612 a = matA->A; b = matA->B; 1613 c = matB->A; d = matB->B; 1614 1615 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1616 if (flg) { 1617 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1618 } 1619 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1620 PetscFunctionReturn(0); 1621 } 1622 1623 #undef __FUNCT__ 1624 #define __FUNCT__ "MatCopy_MPISBAIJ" 1625 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1626 { 1627 PetscErrorCode ierr; 1628 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1629 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1630 1631 PetscFunctionBegin; 1632 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1633 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1634 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1635 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1636 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1637 } else { 1638 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1639 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1640 } 1641 PetscFunctionReturn(0); 1642 } 1643 1644 #undef __FUNCT__ 1645 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1646 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1647 { 1648 PetscErrorCode ierr; 1649 1650 PetscFunctionBegin; 1651 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1652 PetscFunctionReturn(0); 1653 } 1654 1655 #undef __FUNCT__ 1656 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1657 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1658 { 1659 PetscErrorCode ierr; 1660 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1661 PetscBLASInt bnz,one=1; 1662 Mat_SeqSBAIJ *xa,*ya; 1663 Mat_SeqBAIJ *xb,*yb; 1664 1665 PetscFunctionBegin; 1666 if (str == SAME_NONZERO_PATTERN) { 1667 PetscScalar alpha = a; 1668 xa = (Mat_SeqSBAIJ*)xx->A->data; 1669 ya = (Mat_SeqSBAIJ*)yy->A->data; 1670 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1671 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1672 xb = (Mat_SeqBAIJ*)xx->B->data; 1673 yb = (Mat_SeqBAIJ*)yy->B->data; 1674 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1675 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1676 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1677 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1678 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1679 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1680 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1681 } else { 1682 Mat B; 1683 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1684 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1685 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1686 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1687 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1688 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1689 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1690 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1691 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1692 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1693 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1694 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1695 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1696 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1697 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1698 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 1699 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1700 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1701 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1702 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1703 } 1704 PetscFunctionReturn(0); 1705 } 1706 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1709 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1710 { 1711 PetscErrorCode ierr; 1712 PetscInt i; 1713 PetscBool flg,sorted; 1714 1715 PetscFunctionBegin; 1716 for (i = 0; i < n; i++) { 1717 ierr = ISSorted(irow[i],&sorted);CHKERRQ(ierr); 1718 if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row index set %d not sorted",i); 1719 ierr = ISSorted(icol[i],&sorted);CHKERRQ(ierr); 1720 if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Column index set %d not sorted",i); 1721 } 1722 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1723 for (i=0; i<n; i++) { 1724 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1725 if (!flg) { /* *B[i] is non-symmetric, set flag */ 1726 ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr); 1727 } 1728 } 1729 PetscFunctionReturn(0); 1730 } 1731 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "MatShift_MPISBAIJ" 1734 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1735 { 1736 PetscErrorCode ierr; 1737 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1738 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1739 1740 PetscFunctionBegin; 1741 if (!Y->preallocated) { 1742 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1743 } else if (!aij->nz) { 1744 PetscInt nonew = aij->nonew; 1745 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1746 aij->nonew = nonew; 1747 } 1748 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1749 PetscFunctionReturn(0); 1750 } 1751 1752 /* -------------------------------------------------------------------*/ 1753 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1754 MatGetRow_MPISBAIJ, 1755 MatRestoreRow_MPISBAIJ, 1756 MatMult_MPISBAIJ, 1757 /* 4*/ MatMultAdd_MPISBAIJ, 1758 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1759 MatMultAdd_MPISBAIJ, 1760 0, 1761 0, 1762 0, 1763 /* 10*/ 0, 1764 0, 1765 0, 1766 MatSOR_MPISBAIJ, 1767 MatTranspose_MPISBAIJ, 1768 /* 15*/ MatGetInfo_MPISBAIJ, 1769 MatEqual_MPISBAIJ, 1770 MatGetDiagonal_MPISBAIJ, 1771 MatDiagonalScale_MPISBAIJ, 1772 MatNorm_MPISBAIJ, 1773 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1774 MatAssemblyEnd_MPISBAIJ, 1775 MatSetOption_MPISBAIJ, 1776 MatZeroEntries_MPISBAIJ, 1777 /* 24*/ 0, 1778 0, 1779 0, 1780 0, 1781 0, 1782 /* 29*/ MatSetUp_MPISBAIJ, 1783 0, 1784 0, 1785 0, 1786 0, 1787 /* 34*/ MatDuplicate_MPISBAIJ, 1788 0, 1789 0, 1790 0, 1791 0, 1792 /* 39*/ MatAXPY_MPISBAIJ, 1793 MatGetSubMatrices_MPISBAIJ, 1794 MatIncreaseOverlap_MPISBAIJ, 1795 MatGetValues_MPISBAIJ, 1796 MatCopy_MPISBAIJ, 1797 /* 44*/ 0, 1798 MatScale_MPISBAIJ, 1799 MatShift_MPISBAIJ, 1800 0, 1801 0, 1802 /* 49*/ 0, 1803 0, 1804 0, 1805 0, 1806 0, 1807 /* 54*/ 0, 1808 0, 1809 MatSetUnfactored_MPISBAIJ, 1810 0, 1811 MatSetValuesBlocked_MPISBAIJ, 1812 /* 59*/ MatGetSubMatrix_MPISBAIJ, 1813 0, 1814 0, 1815 0, 1816 0, 1817 /* 64*/ 0, 1818 0, 1819 0, 1820 0, 1821 0, 1822 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1823 0, 1824 0, 1825 0, 1826 0, 1827 /* 74*/ 0, 1828 0, 1829 0, 1830 0, 1831 0, 1832 /* 79*/ 0, 1833 0, 1834 0, 1835 0, 1836 MatLoad_MPISBAIJ, 1837 /* 84*/ 0, 1838 0, 1839 0, 1840 0, 1841 0, 1842 /* 89*/ 0, 1843 0, 1844 0, 1845 0, 1846 0, 1847 /* 94*/ 0, 1848 0, 1849 0, 1850 0, 1851 0, 1852 /* 99*/ 0, 1853 0, 1854 0, 1855 0, 1856 0, 1857 /*104*/ 0, 1858 MatRealPart_MPISBAIJ, 1859 MatImaginaryPart_MPISBAIJ, 1860 MatGetRowUpperTriangular_MPISBAIJ, 1861 MatRestoreRowUpperTriangular_MPISBAIJ, 1862 /*109*/ 0, 1863 0, 1864 0, 1865 0, 1866 0, 1867 /*114*/ 0, 1868 0, 1869 0, 1870 0, 1871 0, 1872 /*119*/ 0, 1873 0, 1874 0, 1875 0, 1876 0, 1877 /*124*/ 0, 1878 0, 1879 0, 1880 0, 1881 0, 1882 /*129*/ 0, 1883 0, 1884 0, 1885 0, 1886 0, 1887 /*134*/ 0, 1888 0, 1889 0, 1890 0, 1891 0, 1892 /*139*/ 0, 1893 0, 1894 0, 1895 0, 1896 0, 1897 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1898 }; 1899 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1902 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1903 { 1904 PetscFunctionBegin; 1905 *a = ((Mat_MPISBAIJ*)A->data)->A; 1906 PetscFunctionReturn(0); 1907 } 1908 1909 #undef __FUNCT__ 1910 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1911 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1912 { 1913 Mat_MPISBAIJ *b; 1914 PetscErrorCode ierr; 1915 PetscInt i,mbs,Mbs; 1916 1917 PetscFunctionBegin; 1918 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1919 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1920 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1921 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1922 1923 b = (Mat_MPISBAIJ*)B->data; 1924 mbs = B->rmap->n/bs; 1925 Mbs = B->rmap->N/bs; 1926 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1927 1928 B->rmap->bs = bs; 1929 b->bs2 = bs*bs; 1930 b->mbs = mbs; 1931 b->Mbs = Mbs; 1932 b->nbs = B->cmap->n/bs; 1933 b->Nbs = B->cmap->N/bs; 1934 1935 for (i=0; i<=b->size; i++) { 1936 b->rangebs[i] = B->rmap->range[i]/bs; 1937 } 1938 b->rstartbs = B->rmap->rstart/bs; 1939 b->rendbs = B->rmap->rend/bs; 1940 1941 b->cstartbs = B->cmap->rstart/bs; 1942 b->cendbs = B->cmap->rend/bs; 1943 1944 if (!B->preallocated) { 1945 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1946 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1947 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1948 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1949 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1950 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1951 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1952 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1953 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1954 } 1955 1956 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1957 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1958 1959 B->preallocated = PETSC_TRUE; 1960 PetscFunctionReturn(0); 1961 } 1962 1963 #undef __FUNCT__ 1964 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1965 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1966 { 1967 PetscInt m,rstart,cstart,cend; 1968 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1969 const PetscInt *JJ =0; 1970 PetscScalar *values=0; 1971 PetscErrorCode ierr; 1972 1973 PetscFunctionBegin; 1974 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1975 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1976 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1977 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1978 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1979 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1980 m = B->rmap->n/bs; 1981 rstart = B->rmap->rstart/bs; 1982 cstart = B->cmap->rstart/bs; 1983 cend = B->cmap->rend/bs; 1984 1985 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1986 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 1987 for (i=0; i<m; i++) { 1988 nz = ii[i+1] - ii[i]; 1989 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1990 nz_max = PetscMax(nz_max,nz); 1991 JJ = jj + ii[i]; 1992 for (j=0; j<nz; j++) { 1993 if (*JJ >= cstart) break; 1994 JJ++; 1995 } 1996 d = 0; 1997 for (; j<nz; j++) { 1998 if (*JJ++ >= cend) break; 1999 d++; 2000 } 2001 d_nnz[i] = d; 2002 o_nnz[i] = nz - d; 2003 } 2004 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2005 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2006 2007 values = (PetscScalar*)V; 2008 if (!values) { 2009 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2010 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2011 } 2012 for (i=0; i<m; i++) { 2013 PetscInt row = i + rstart; 2014 PetscInt ncols = ii[i+1] - ii[i]; 2015 const PetscInt *icols = jj + ii[i]; 2016 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2017 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2018 } 2019 2020 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2021 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2022 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2023 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2024 PetscFunctionReturn(0); 2025 } 2026 2027 /*MC 2028 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2029 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2030 the matrix is stored. 2031 2032 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2033 can call MatSetOption(Mat, MAT_HERMITIAN); 2034 2035 Options Database Keys: 2036 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2037 2038 Level: beginner 2039 2040 .seealso: MatCreateMPISBAIJ 2041 M*/ 2042 2043 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 2044 2045 #undef __FUNCT__ 2046 #define __FUNCT__ "MatCreate_MPISBAIJ" 2047 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2048 { 2049 Mat_MPISBAIJ *b; 2050 PetscErrorCode ierr; 2051 PetscBool flg = PETSC_FALSE; 2052 2053 PetscFunctionBegin; 2054 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2055 B->data = (void*)b; 2056 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2057 2058 B->ops->destroy = MatDestroy_MPISBAIJ; 2059 B->ops->view = MatView_MPISBAIJ; 2060 B->assembled = PETSC_FALSE; 2061 B->insertmode = NOT_SET_VALUES; 2062 2063 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2064 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2065 2066 /* build local table of row and column ownerships */ 2067 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2068 2069 /* build cache for off array entries formed */ 2070 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2071 2072 b->donotstash = PETSC_FALSE; 2073 b->colmap = NULL; 2074 b->garray = NULL; 2075 b->roworiented = PETSC_TRUE; 2076 2077 /* stuff used in block assembly */ 2078 b->barray = 0; 2079 2080 /* stuff used for matrix vector multiply */ 2081 b->lvec = 0; 2082 b->Mvctx = 0; 2083 b->slvec0 = 0; 2084 b->slvec0b = 0; 2085 b->slvec1 = 0; 2086 b->slvec1a = 0; 2087 b->slvec1b = 0; 2088 b->sMvctx = 0; 2089 2090 /* stuff for MatGetRow() */ 2091 b->rowindices = 0; 2092 b->rowvalues = 0; 2093 b->getrowactive = PETSC_FALSE; 2094 2095 /* hash table stuff */ 2096 b->ht = 0; 2097 b->hd = 0; 2098 b->ht_size = 0; 2099 b->ht_flag = PETSC_FALSE; 2100 b->ht_fact = 0; 2101 b->ht_total_ct = 0; 2102 b->ht_insert_ct = 0; 2103 2104 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 2105 b->ijonly = PETSC_FALSE; 2106 2107 b->in_loc = 0; 2108 b->v_loc = 0; 2109 b->n_loc = 0; 2110 2111 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2113 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 2114 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2115 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2116 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 2117 #if defined(PETSC_HAVE_ELEMENTAL) 2118 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2119 #endif 2120 2121 B->symmetric = PETSC_TRUE; 2122 B->structurally_symmetric = PETSC_TRUE; 2123 B->symmetric_set = PETSC_TRUE; 2124 B->structurally_symmetric_set = PETSC_TRUE; 2125 2126 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2127 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2128 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2129 if (flg) { 2130 PetscReal fact = 1.39; 2131 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2132 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2133 if (fact <= 1.0) fact = 1.39; 2134 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2135 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2136 } 2137 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2138 PetscFunctionReturn(0); 2139 } 2140 2141 /*MC 2142 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2143 2144 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2145 and MATMPISBAIJ otherwise. 2146 2147 Options Database Keys: 2148 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2149 2150 Level: beginner 2151 2152 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2153 M*/ 2154 2155 #undef __FUNCT__ 2156 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 2157 /*@C 2158 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2159 the user should preallocate the matrix storage by setting the parameters 2160 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2161 performance can be increased by more than a factor of 50. 2162 2163 Collective on Mat 2164 2165 Input Parameters: 2166 + B - the matrix 2167 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2168 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2169 . d_nz - number of block nonzeros per block row in diagonal portion of local 2170 submatrix (same for all local rows) 2171 . d_nnz - array containing the number of block nonzeros in the various block rows 2172 in the upper triangular and diagonal part of the in diagonal portion of the local 2173 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2174 for the diagonal entry and set a value even if it is zero. 2175 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2176 submatrix (same for all local rows). 2177 - o_nnz - array containing the number of nonzeros in the various block rows of the 2178 off-diagonal portion of the local submatrix that is right of the diagonal 2179 (possibly different for each block row) or NULL. 2180 2181 2182 Options Database Keys: 2183 . -mat_no_unroll - uses code that does not unroll the loops in the 2184 block calculations (much slower) 2185 . -mat_block_size - size of the blocks to use 2186 2187 Notes: 2188 2189 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2190 than it must be used on all processors that share the object for that argument. 2191 2192 If the *_nnz parameter is given then the *_nz parameter is ignored 2193 2194 Storage Information: 2195 For a square global matrix we define each processor's diagonal portion 2196 to be its local rows and the corresponding columns (a square submatrix); 2197 each processor's off-diagonal portion encompasses the remainder of the 2198 local matrix (a rectangular submatrix). 2199 2200 The user can specify preallocated storage for the diagonal part of 2201 the local submatrix with either d_nz or d_nnz (not both). Set 2202 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2203 memory allocation. Likewise, specify preallocated storage for the 2204 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2205 2206 You can call MatGetInfo() to get information on how effective the preallocation was; 2207 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2208 You can also run with the option -info and look for messages with the string 2209 malloc in them to see if additional memory allocation was needed. 2210 2211 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2212 the figure below we depict these three local rows and all columns (0-11). 2213 2214 .vb 2215 0 1 2 3 4 5 6 7 8 9 10 11 2216 -------------------------- 2217 row 3 |. . . d d d o o o o o o 2218 row 4 |. . . d d d o o o o o o 2219 row 5 |. . . d d d o o o o o o 2220 -------------------------- 2221 .ve 2222 2223 Thus, any entries in the d locations are stored in the d (diagonal) 2224 submatrix, and any entries in the o locations are stored in the 2225 o (off-diagonal) submatrix. Note that the d matrix is stored in 2226 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2227 2228 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2229 plus the diagonal part of the d matrix, 2230 and o_nz should indicate the number of block nonzeros per row in the o matrix 2231 2232 In general, for PDE problems in which most nonzeros are near the diagonal, 2233 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2234 or you will get TERRIBLE performance; see the users' manual chapter on 2235 matrices. 2236 2237 Level: intermediate 2238 2239 .keywords: matrix, block, aij, compressed row, sparse, parallel 2240 2241 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2242 @*/ 2243 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2244 { 2245 PetscErrorCode ierr; 2246 2247 PetscFunctionBegin; 2248 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2249 PetscValidType(B,1); 2250 PetscValidLogicalCollectiveInt(B,bs,2); 2251 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2252 PetscFunctionReturn(0); 2253 } 2254 2255 #undef __FUNCT__ 2256 #define __FUNCT__ "MatCreateSBAIJ" 2257 /*@C 2258 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2259 (block compressed row). For good matrix assembly performance 2260 the user should preallocate the matrix storage by setting the parameters 2261 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2262 performance can be increased by more than a factor of 50. 2263 2264 Collective on MPI_Comm 2265 2266 Input Parameters: 2267 + comm - MPI communicator 2268 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2269 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2270 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2271 This value should be the same as the local size used in creating the 2272 y vector for the matrix-vector product y = Ax. 2273 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2274 This value should be the same as the local size used in creating the 2275 x vector for the matrix-vector product y = Ax. 2276 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2277 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2278 . d_nz - number of block nonzeros per block row in diagonal portion of local 2279 submatrix (same for all local rows) 2280 . d_nnz - array containing the number of block nonzeros in the various block rows 2281 in the upper triangular portion of the in diagonal portion of the local 2282 (possibly different for each block block row) or NULL. 2283 If you plan to factor the matrix you must leave room for the diagonal entry and 2284 set its value even if it is zero. 2285 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2286 submatrix (same for all local rows). 2287 - o_nnz - array containing the number of nonzeros in the various block rows of the 2288 off-diagonal portion of the local submatrix (possibly different for 2289 each block row) or NULL. 2290 2291 Output Parameter: 2292 . A - the matrix 2293 2294 Options Database Keys: 2295 . -mat_no_unroll - uses code that does not unroll the loops in the 2296 block calculations (much slower) 2297 . -mat_block_size - size of the blocks to use 2298 . -mat_mpi - use the parallel matrix data structures even on one processor 2299 (defaults to using SeqBAIJ format on one processor) 2300 2301 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2302 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2303 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2304 2305 Notes: 2306 The number of rows and columns must be divisible by blocksize. 2307 This matrix type does not support complex Hermitian operation. 2308 2309 The user MUST specify either the local or global matrix dimensions 2310 (possibly both). 2311 2312 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2313 than it must be used on all processors that share the object for that argument. 2314 2315 If the *_nnz parameter is given then the *_nz parameter is ignored 2316 2317 Storage Information: 2318 For a square global matrix we define each processor's diagonal portion 2319 to be its local rows and the corresponding columns (a square submatrix); 2320 each processor's off-diagonal portion encompasses the remainder of the 2321 local matrix (a rectangular submatrix). 2322 2323 The user can specify preallocated storage for the diagonal part of 2324 the local submatrix with either d_nz or d_nnz (not both). Set 2325 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2326 memory allocation. Likewise, specify preallocated storage for the 2327 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2328 2329 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2330 the figure below we depict these three local rows and all columns (0-11). 2331 2332 .vb 2333 0 1 2 3 4 5 6 7 8 9 10 11 2334 -------------------------- 2335 row 3 |. . . d d d o o o o o o 2336 row 4 |. . . d d d o o o o o o 2337 row 5 |. . . d d d o o o o o o 2338 -------------------------- 2339 .ve 2340 2341 Thus, any entries in the d locations are stored in the d (diagonal) 2342 submatrix, and any entries in the o locations are stored in the 2343 o (off-diagonal) submatrix. Note that the d matrix is stored in 2344 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2345 2346 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2347 plus the diagonal part of the d matrix, 2348 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2349 In general, for PDE problems in which most nonzeros are near the diagonal, 2350 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2351 or you will get TERRIBLE performance; see the users' manual chapter on 2352 matrices. 2353 2354 Level: intermediate 2355 2356 .keywords: matrix, block, aij, compressed row, sparse, parallel 2357 2358 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2359 @*/ 2360 2361 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2362 { 2363 PetscErrorCode ierr; 2364 PetscMPIInt size; 2365 2366 PetscFunctionBegin; 2367 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2368 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2369 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2370 if (size > 1) { 2371 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2372 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2373 } else { 2374 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2375 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2376 } 2377 PetscFunctionReturn(0); 2378 } 2379 2380 2381 #undef __FUNCT__ 2382 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2383 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2384 { 2385 Mat mat; 2386 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2387 PetscErrorCode ierr; 2388 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2389 PetscScalar *array; 2390 2391 PetscFunctionBegin; 2392 *newmat = 0; 2393 2394 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2395 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2396 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2397 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2398 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2399 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2400 2401 mat->factortype = matin->factortype; 2402 mat->preallocated = PETSC_TRUE; 2403 mat->assembled = PETSC_TRUE; 2404 mat->insertmode = NOT_SET_VALUES; 2405 2406 a = (Mat_MPISBAIJ*)mat->data; 2407 a->bs2 = oldmat->bs2; 2408 a->mbs = oldmat->mbs; 2409 a->nbs = oldmat->nbs; 2410 a->Mbs = oldmat->Mbs; 2411 a->Nbs = oldmat->Nbs; 2412 2413 2414 a->size = oldmat->size; 2415 a->rank = oldmat->rank; 2416 a->donotstash = oldmat->donotstash; 2417 a->roworiented = oldmat->roworiented; 2418 a->rowindices = 0; 2419 a->rowvalues = 0; 2420 a->getrowactive = PETSC_FALSE; 2421 a->barray = 0; 2422 a->rstartbs = oldmat->rstartbs; 2423 a->rendbs = oldmat->rendbs; 2424 a->cstartbs = oldmat->cstartbs; 2425 a->cendbs = oldmat->cendbs; 2426 2427 /* hash table stuff */ 2428 a->ht = 0; 2429 a->hd = 0; 2430 a->ht_size = 0; 2431 a->ht_flag = oldmat->ht_flag; 2432 a->ht_fact = oldmat->ht_fact; 2433 a->ht_total_ct = 0; 2434 a->ht_insert_ct = 0; 2435 2436 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2437 if (oldmat->colmap) { 2438 #if defined(PETSC_USE_CTABLE) 2439 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2440 #else 2441 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2442 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2443 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2444 #endif 2445 } else a->colmap = 0; 2446 2447 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2448 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2449 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2450 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2451 } else a->garray = 0; 2452 2453 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2454 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2455 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2456 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2457 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2458 2459 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2460 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2461 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2462 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2463 2464 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2465 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2466 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2467 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2468 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2469 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2470 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2471 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2472 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2473 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2474 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2475 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2476 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2477 2478 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2479 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2480 a->sMvctx = oldmat->sMvctx; 2481 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2482 2483 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2484 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2485 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2486 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2487 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2488 *newmat = mat; 2489 PetscFunctionReturn(0); 2490 } 2491 2492 #undef __FUNCT__ 2493 #define __FUNCT__ "MatLoad_MPISBAIJ" 2494 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2495 { 2496 PetscErrorCode ierr; 2497 PetscInt i,nz,j,rstart,rend; 2498 PetscScalar *vals,*buf; 2499 MPI_Comm comm; 2500 MPI_Status status; 2501 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2502 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2503 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2504 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2505 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2506 PetscInt dcount,kmax,k,nzcount,tmp; 2507 int fd; 2508 2509 PetscFunctionBegin; 2510 /* force binary viewer to load .info file if it has not yet done so */ 2511 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2512 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2513 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2514 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2515 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2516 if (bs < 0) bs = 1; 2517 2518 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2519 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2520 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2521 if (!rank) { 2522 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2523 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2524 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2525 } 2526 2527 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2528 M = header[1]; 2529 N = header[2]; 2530 2531 /* If global sizes are set, check if they are consistent with that given in the file */ 2532 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2533 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2534 2535 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2536 2537 /* 2538 This code adds extra rows to make sure the number of rows is 2539 divisible by the blocksize 2540 */ 2541 Mbs = M/bs; 2542 extra_rows = bs - M + bs*(Mbs); 2543 if (extra_rows == bs) extra_rows = 0; 2544 else Mbs++; 2545 if (extra_rows &&!rank) { 2546 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2547 } 2548 2549 /* determine ownership of all rows */ 2550 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2551 mbs = Mbs/size + ((Mbs % size) > rank); 2552 m = mbs*bs; 2553 } else { /* User Set */ 2554 m = newmat->rmap->n; 2555 mbs = m/bs; 2556 } 2557 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2558 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2559 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2560 rowners[0] = 0; 2561 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2562 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2563 rstart = rowners[rank]; 2564 rend = rowners[rank+1]; 2565 2566 /* distribute row lengths to all processors */ 2567 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2568 if (!rank) { 2569 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2570 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2571 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2572 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2573 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2574 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2575 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2576 } else { 2577 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2578 } 2579 2580 if (!rank) { /* procs[0] */ 2581 /* calculate the number of nonzeros on each processor */ 2582 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2583 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2584 for (i=0; i<size; i++) { 2585 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2586 procsnz[i] += rowlengths[j]; 2587 } 2588 } 2589 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2590 2591 /* determine max buffer needed and allocate it */ 2592 maxnz = 0; 2593 for (i=0; i<size; i++) { 2594 maxnz = PetscMax(maxnz,procsnz[i]); 2595 } 2596 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2597 2598 /* read in my part of the matrix column indices */ 2599 nz = procsnz[0]; 2600 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2601 mycols = ibuf; 2602 if (size == 1) nz -= extra_rows; 2603 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2604 if (size == 1) { 2605 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2606 } 2607 2608 /* read in every ones (except the last) and ship off */ 2609 for (i=1; i<size-1; i++) { 2610 nz = procsnz[i]; 2611 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2612 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2613 } 2614 /* read in the stuff for the last proc */ 2615 if (size != 1) { 2616 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2617 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2618 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2619 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2620 } 2621 ierr = PetscFree(cols);CHKERRQ(ierr); 2622 } else { /* procs[i], i>0 */ 2623 /* determine buffer space needed for message */ 2624 nz = 0; 2625 for (i=0; i<m; i++) nz += locrowlens[i]; 2626 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2627 mycols = ibuf; 2628 /* receive message of column indices*/ 2629 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2630 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2631 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2632 } 2633 2634 /* loop over local rows, determining number of off diagonal entries */ 2635 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2636 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2637 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2638 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2639 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2640 rowcount = 0; 2641 nzcount = 0; 2642 for (i=0; i<mbs; i++) { 2643 dcount = 0; 2644 odcount = 0; 2645 for (j=0; j<bs; j++) { 2646 kmax = locrowlens[rowcount]; 2647 for (k=0; k<kmax; k++) { 2648 tmp = mycols[nzcount++]/bs; /* block col. index */ 2649 if (!mask[tmp]) { 2650 mask[tmp] = 1; 2651 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2652 else masked1[dcount++] = tmp; /* entry in diag portion */ 2653 } 2654 } 2655 rowcount++; 2656 } 2657 2658 dlens[i] = dcount; /* d_nzz[i] */ 2659 odlens[i] = odcount; /* o_nzz[i] */ 2660 2661 /* zero out the mask elements we set */ 2662 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2663 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2664 } 2665 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2666 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2667 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2668 2669 if (!rank) { 2670 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2671 /* read in my part of the matrix numerical values */ 2672 nz = procsnz[0]; 2673 vals = buf; 2674 mycols = ibuf; 2675 if (size == 1) nz -= extra_rows; 2676 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2677 if (size == 1) { 2678 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2679 } 2680 2681 /* insert into matrix */ 2682 jj = rstart*bs; 2683 for (i=0; i<m; i++) { 2684 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2685 mycols += locrowlens[i]; 2686 vals += locrowlens[i]; 2687 jj++; 2688 } 2689 2690 /* read in other processors (except the last one) and ship out */ 2691 for (i=1; i<size-1; i++) { 2692 nz = procsnz[i]; 2693 vals = buf; 2694 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2695 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2696 } 2697 /* the last proc */ 2698 if (size != 1) { 2699 nz = procsnz[i] - extra_rows; 2700 vals = buf; 2701 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2702 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2703 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2704 } 2705 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2706 2707 } else { 2708 /* receive numeric values */ 2709 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2710 2711 /* receive message of values*/ 2712 vals = buf; 2713 mycols = ibuf; 2714 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2715 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2716 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2717 2718 /* insert into matrix */ 2719 jj = rstart*bs; 2720 for (i=0; i<m; i++) { 2721 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2722 mycols += locrowlens[i]; 2723 vals += locrowlens[i]; 2724 jj++; 2725 } 2726 } 2727 2728 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2729 ierr = PetscFree(buf);CHKERRQ(ierr); 2730 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2731 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2732 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2733 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2734 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2735 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2736 PetscFunctionReturn(0); 2737 } 2738 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2741 /*XXXXX@ 2742 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2743 2744 Input Parameters: 2745 . mat - the matrix 2746 . fact - factor 2747 2748 Not Collective on Mat, each process can have a different hash factor 2749 2750 Level: advanced 2751 2752 Notes: 2753 This can also be set by the command line option: -mat_use_hash_table fact 2754 2755 .keywords: matrix, hashtable, factor, HT 2756 2757 .seealso: MatSetOption() 2758 @XXXXX*/ 2759 2760 2761 #undef __FUNCT__ 2762 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2763 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2764 { 2765 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2766 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2767 PetscReal atmp; 2768 PetscReal *work,*svalues,*rvalues; 2769 PetscErrorCode ierr; 2770 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2771 PetscMPIInt rank,size; 2772 PetscInt *rowners_bs,dest,count,source; 2773 PetscScalar *va; 2774 MatScalar *ba; 2775 MPI_Status stat; 2776 2777 PetscFunctionBegin; 2778 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2779 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2780 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2781 2782 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2783 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2784 2785 bs = A->rmap->bs; 2786 mbs = a->mbs; 2787 Mbs = a->Mbs; 2788 ba = b->a; 2789 bi = b->i; 2790 bj = b->j; 2791 2792 /* find ownerships */ 2793 rowners_bs = A->rmap->range; 2794 2795 /* each proc creates an array to be distributed */ 2796 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2797 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2798 2799 /* row_max for B */ 2800 if (rank != size-1) { 2801 for (i=0; i<mbs; i++) { 2802 ncols = bi[1] - bi[0]; bi++; 2803 brow = bs*i; 2804 for (j=0; j<ncols; j++) { 2805 bcol = bs*(*bj); 2806 for (kcol=0; kcol<bs; kcol++) { 2807 col = bcol + kcol; /* local col index */ 2808 col += rowners_bs[rank+1]; /* global col index */ 2809 for (krow=0; krow<bs; krow++) { 2810 atmp = PetscAbsScalar(*ba); ba++; 2811 row = brow + krow; /* local row index */ 2812 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2813 if (work[col] < atmp) work[col] = atmp; 2814 } 2815 } 2816 bj++; 2817 } 2818 } 2819 2820 /* send values to its owners */ 2821 for (dest=rank+1; dest<size; dest++) { 2822 svalues = work + rowners_bs[dest]; 2823 count = rowners_bs[dest+1]-rowners_bs[dest]; 2824 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2825 } 2826 } 2827 2828 /* receive values */ 2829 if (rank) { 2830 rvalues = work; 2831 count = rowners_bs[rank+1]-rowners_bs[rank]; 2832 for (source=0; source<rank; source++) { 2833 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2834 /* process values */ 2835 for (i=0; i<count; i++) { 2836 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2837 } 2838 } 2839 } 2840 2841 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2842 ierr = PetscFree(work);CHKERRQ(ierr); 2843 PetscFunctionReturn(0); 2844 } 2845 2846 #undef __FUNCT__ 2847 #define __FUNCT__ "MatSOR_MPISBAIJ" 2848 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2849 { 2850 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2851 PetscErrorCode ierr; 2852 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2853 PetscScalar *x,*ptr,*from; 2854 Vec bb1; 2855 const PetscScalar *b; 2856 2857 PetscFunctionBegin; 2858 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2859 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2860 2861 if (flag == SOR_APPLY_UPPER) { 2862 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2863 PetscFunctionReturn(0); 2864 } 2865 2866 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2867 if (flag & SOR_ZERO_INITIAL_GUESS) { 2868 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2869 its--; 2870 } 2871 2872 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2873 while (its--) { 2874 2875 /* lower triangular part: slvec0b = - B^T*xx */ 2876 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2877 2878 /* copy xx into slvec0a */ 2879 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2880 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2881 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2882 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2883 2884 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2885 2886 /* copy bb into slvec1a */ 2887 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2888 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2889 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2890 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2891 2892 /* set slvec1b = 0 */ 2893 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2894 2895 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2896 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2897 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2898 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2899 2900 /* upper triangular part: bb1 = bb1 - B*x */ 2901 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2902 2903 /* local diagonal sweep */ 2904 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2905 } 2906 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2907 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2908 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2909 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2910 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2911 } else if (flag & SOR_EISENSTAT) { 2912 Vec xx1; 2913 PetscBool hasop; 2914 const PetscScalar *diag; 2915 PetscScalar *sl,scale = (omega - 2.0)/omega; 2916 PetscInt i,n; 2917 2918 if (!mat->xx1) { 2919 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2920 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2921 } 2922 xx1 = mat->xx1; 2923 bb1 = mat->bb1; 2924 2925 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2926 2927 if (!mat->diag) { 2928 /* this is wrong for same matrix with new nonzero values */ 2929 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2930 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2931 } 2932 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2933 2934 if (hasop) { 2935 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2936 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2937 } else { 2938 /* 2939 These two lines are replaced by code that may be a bit faster for a good compiler 2940 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2941 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2942 */ 2943 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2944 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2945 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2946 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2947 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2948 if (omega == 1.0) { 2949 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2950 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2951 } else { 2952 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2953 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2954 } 2955 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2956 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2957 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2958 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2959 } 2960 2961 /* multiply off-diagonal portion of matrix */ 2962 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2963 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2964 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2965 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2966 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2967 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2968 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2969 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2970 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2971 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2972 2973 /* local sweep */ 2974 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2975 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2976 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2977 PetscFunctionReturn(0); 2978 } 2979 2980 #undef __FUNCT__ 2981 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2982 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2983 { 2984 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2985 PetscErrorCode ierr; 2986 Vec lvec1,bb1; 2987 2988 PetscFunctionBegin; 2989 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2990 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2991 2992 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2993 if (flag & SOR_ZERO_INITIAL_GUESS) { 2994 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2995 its--; 2996 } 2997 2998 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2999 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3000 while (its--) { 3001 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3002 3003 /* lower diagonal part: bb1 = bb - B^T*xx */ 3004 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3005 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3006 3007 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3008 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3009 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3010 3011 /* upper diagonal part: bb1 = bb1 - B*x */ 3012 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3013 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3014 3015 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3016 3017 /* diagonal sweep */ 3018 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3019 } 3020 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3021 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3022 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3023 PetscFunctionReturn(0); 3024 } 3025 3026 #undef __FUNCT__ 3027 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 3028 /*@ 3029 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3030 CSR format the local rows. 3031 3032 Collective on MPI_Comm 3033 3034 Input Parameters: 3035 + comm - MPI communicator 3036 . bs - the block size, only a block size of 1 is supported 3037 . m - number of local rows (Cannot be PETSC_DECIDE) 3038 . n - This value should be the same as the local size used in creating the 3039 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3040 calculated if N is given) For square matrices n is almost always m. 3041 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3042 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3043 . i - row indices 3044 . j - column indices 3045 - a - matrix values 3046 3047 Output Parameter: 3048 . mat - the matrix 3049 3050 Level: intermediate 3051 3052 Notes: 3053 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3054 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3055 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3056 3057 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3058 3059 .keywords: matrix, aij, compressed row, sparse, parallel 3060 3061 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3062 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3063 @*/ 3064 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3065 { 3066 PetscErrorCode ierr; 3067 3068 3069 PetscFunctionBegin; 3070 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3071 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3072 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3073 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3074 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3075 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3076 PetscFunctionReturn(0); 3077 } 3078 3079 3080 #undef __FUNCT__ 3081 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 3082 /*@C 3083 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3084 (the default parallel PETSc format). 3085 3086 Collective on MPI_Comm 3087 3088 Input Parameters: 3089 + B - the matrix 3090 . bs - the block size 3091 . i - the indices into j for the start of each local row (starts with zero) 3092 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3093 - v - optional values in the matrix 3094 3095 Level: developer 3096 3097 .keywords: matrix, aij, compressed row, sparse, parallel 3098 3099 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3100 @*/ 3101 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3102 { 3103 PetscErrorCode ierr; 3104 3105 PetscFunctionBegin; 3106 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3107 PetscFunctionReturn(0); 3108 } 3109 3110 #undef __FUNCT__ 3111 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 3112 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3113 { 3114 PetscErrorCode ierr; 3115 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3116 PetscInt *indx; 3117 PetscScalar *values; 3118 3119 PetscFunctionBegin; 3120 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3121 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3122 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3123 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 3124 PetscInt *bindx,rmax=a->rmax,j; 3125 3126 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3127 mbs = m/bs; Nbs = N/cbs; 3128 if (n == PETSC_DECIDE) { 3129 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3130 } 3131 /* Check sum(n) = Nbs */ 3132 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3133 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3134 3135 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3136 rstart -= mbs; 3137 3138 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3139 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3140 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3141 for (i=0; i<mbs; i++) { 3142 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3143 nnz = nnz/bs; 3144 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3145 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3146 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3147 } 3148 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3149 ierr = PetscFree(bindx);CHKERRQ(ierr); 3150 3151 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3152 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3153 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3154 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 3155 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3156 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3157 } 3158 3159 /* numeric phase */ 3160 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3161 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3162 3163 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3164 for (i=0; i<m; i++) { 3165 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3166 Ii = i + rstart; 3167 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3168 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3169 } 3170 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3171 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3172 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3173 PetscFunctionReturn(0); 3174 } 3175