1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 12 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 19 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 20 PetscFunctionReturn(0); 21 } 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 25 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 26 { 27 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 32 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 33 PetscFunctionReturn(0); 34 } 35 36 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 37 { \ 38 \ 39 brow = row/bs; \ 40 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 41 rmax = aimax[brow]; nrow = ailen[brow]; \ 42 bcol = col/bs; \ 43 ridx = row % bs; cidx = col % bs; \ 44 low = 0; high = nrow; \ 45 while (high-low > 3) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > bcol) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > bcol) break; \ 52 if (rp[_i] == bcol) { \ 53 bap = ap + bs2*_i + bs*cidx + ridx; \ 54 if (addv == ADD_VALUES) *bap += value; \ 55 else *bap = value; \ 56 goto a_noinsert; \ 57 } \ 58 } \ 59 if (a->nonew == 1) goto a_noinsert; \ 60 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 61 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 62 N = nrow++ - 1; \ 63 /* shift up all the later entries in this row */ \ 64 for (ii=N; ii>=_i; ii--) { \ 65 rp[ii+1] = rp[ii]; \ 66 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 67 } \ 68 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 69 rp[_i] = bcol; \ 70 ap[bs2*_i + bs*cidx + ridx] = value; \ 71 A->nonzerostate++;\ 72 a_noinsert:; \ 73 ailen[brow] = nrow; \ 74 } 75 76 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 77 { \ 78 brow = row/bs; \ 79 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 80 rmax = bimax[brow]; nrow = bilen[brow]; \ 81 bcol = col/bs; \ 82 ridx = row % bs; cidx = col % bs; \ 83 low = 0; high = nrow; \ 84 while (high-low > 3) { \ 85 t = (low+high)/2; \ 86 if (rp[t] > bcol) high = t; \ 87 else low = t; \ 88 } \ 89 for (_i=low; _i<high; _i++) { \ 90 if (rp[_i] > bcol) break; \ 91 if (rp[_i] == bcol) { \ 92 bap = ap + bs2*_i + bs*cidx + ridx; \ 93 if (addv == ADD_VALUES) *bap += value; \ 94 else *bap = value; \ 95 goto b_noinsert; \ 96 } \ 97 } \ 98 if (b->nonew == 1) goto b_noinsert; \ 99 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 100 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for (ii=N; ii>=_i; ii--) { \ 104 rp[ii+1] = rp[ii]; \ 105 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 106 } \ 107 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 B->nonzerostate++;\ 111 b_noinsert:; \ 112 bilen[brow] = nrow; \ 113 } 114 115 /* Only add/insert a(i,j) with i<=j (blocks). 116 Any a(i,j) with i>j input by user is ingored. 117 */ 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatSetValues_MPISBAIJ" 120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 121 { 122 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 123 MatScalar value; 124 PetscBool roworiented = baij->roworiented; 125 PetscErrorCode ierr; 126 PetscInt i,j,row,col; 127 PetscInt rstart_orig=mat->rmap->rstart; 128 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 129 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 130 131 /* Some Variables required in the macro */ 132 Mat A = baij->A; 133 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 134 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 135 MatScalar *aa =a->a; 136 137 Mat B = baij->B; 138 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 139 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 140 MatScalar *ba =b->a; 141 142 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 143 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 144 MatScalar *ap,*bap; 145 146 /* for stash */ 147 PetscInt n_loc, *in_loc = NULL; 148 MatScalar *v_loc = NULL; 149 150 PetscFunctionBegin; 151 if (!baij->donotstash) { 152 if (n > baij->n_loc) { 153 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 154 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 155 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 156 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 157 158 baij->n_loc = n; 159 } 160 in_loc = baij->in_loc; 161 v_loc = baij->v_loc; 162 } 163 164 for (i=0; i<m; i++) { 165 if (im[i] < 0) continue; 166 #if defined(PETSC_USE_DEBUG) 167 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 168 #endif 169 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 170 row = im[i] - rstart_orig; /* local row index */ 171 for (j=0; j<n; j++) { 172 if (im[i]/bs > in[j]/bs) { 173 if (a->ignore_ltriangular) { 174 continue; /* ignore lower triangular blocks */ 175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 176 } 177 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 178 col = in[j] - cstart_orig; /* local col index */ 179 brow = row/bs; bcol = col/bs; 180 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 181 if (roworiented) value = v[i*n+j]; 182 else value = v[i+j*m]; 183 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 184 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 185 } else if (in[j] < 0) continue; 186 #if defined(PETSC_USE_DEBUG) 187 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 188 #endif 189 else { /* off-diag entry (B) */ 190 if (mat->was_assembled) { 191 if (!baij->colmap) { 192 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 193 } 194 #if defined(PETSC_USE_CTABLE) 195 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 196 col = col - 1; 197 #else 198 col = baij->colmap[in[j]/bs] - 1; 199 #endif 200 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 201 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 202 col = in[j]; 203 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 204 B = baij->B; 205 b = (Mat_SeqBAIJ*)(B)->data; 206 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 207 ba = b->a; 208 } else col += in[j]%bs; 209 } else col = in[j]; 210 if (roworiented) value = v[i*n+j]; 211 else value = v[i+j*m]; 212 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 213 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 } 216 } else { /* off processor entry */ 217 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 218 if (!baij->donotstash) { 219 mat->assembled = PETSC_FALSE; 220 n_loc = 0; 221 for (j=0; j<n; j++) { 222 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 223 in_loc[n_loc] = in[j]; 224 if (roworiented) { 225 v_loc[n_loc] = v[i*n+j]; 226 } else { 227 v_loc[n_loc] = v[j*m+i]; 228 } 229 n_loc++; 230 } 231 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined" 240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 241 { 242 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 243 PetscErrorCode ierr; 244 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 245 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 246 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 247 PetscBool roworiented=a->roworiented; 248 const PetscScalar *value = v; 249 MatScalar *ap,*aa = a->a,*bap; 250 251 PetscFunctionBegin; 252 if (col < row) { 253 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 254 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 255 } 256 rp = aj + ai[row]; 257 ap = aa + bs2*ai[row]; 258 rmax = imax[row]; 259 nrow = ailen[row]; 260 value = v; 261 low = 0; 262 high = nrow; 263 264 while (high-low > 7) { 265 t = (low+high)/2; 266 if (rp[t] > col) high = t; 267 else low = t; 268 } 269 for (i=low; i<high; i++) { 270 if (rp[i] > col) break; 271 if (rp[i] == col) { 272 bap = ap + bs2*i; 273 if (roworiented) { 274 if (is == ADD_VALUES) { 275 for (ii=0; ii<bs; ii++) { 276 for (jj=ii; jj<bs2; jj+=bs) { 277 bap[jj] += *value++; 278 } 279 } 280 } else { 281 for (ii=0; ii<bs; ii++) { 282 for (jj=ii; jj<bs2; jj+=bs) { 283 bap[jj] = *value++; 284 } 285 } 286 } 287 } else { 288 if (is == ADD_VALUES) { 289 for (ii=0; ii<bs; ii++) { 290 for (jj=0; jj<bs; jj++) { 291 *bap++ += *value++; 292 } 293 } 294 } else { 295 for (ii=0; ii<bs; ii++) { 296 for (jj=0; jj<bs; jj++) { 297 *bap++ = *value++; 298 } 299 } 300 } 301 } 302 goto noinsert2; 303 } 304 } 305 if (nonew == 1) goto noinsert2; 306 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 307 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 308 N = nrow++ - 1; high++; 309 /* shift up all the later entries in this row */ 310 for (ii=N; ii>=i; ii--) { 311 rp[ii+1] = rp[ii]; 312 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 313 } 314 if (N >= i) { 315 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 316 } 317 rp[i] = col; 318 bap = ap + bs2*i; 319 if (roworiented) { 320 for (ii=0; ii<bs; ii++) { 321 for (jj=ii; jj<bs2; jj+=bs) { 322 bap[jj] = *value++; 323 } 324 } 325 } else { 326 for (ii=0; ii<bs; ii++) { 327 for (jj=0; jj<bs; jj++) { 328 *bap++ = *value++; 329 } 330 } 331 } 332 noinsert2:; 333 ailen[row] = nrow; 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined" 339 /* 340 This routine is exactly duplicated in mpibaij.c 341 */ 342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 343 { 344 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 345 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 346 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 347 PetscErrorCode ierr; 348 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 349 PetscBool roworiented=a->roworiented; 350 const PetscScalar *value = v; 351 MatScalar *ap,*aa = a->a,*bap; 352 353 PetscFunctionBegin; 354 rp = aj + ai[row]; 355 ap = aa + bs2*ai[row]; 356 rmax = imax[row]; 357 nrow = ailen[row]; 358 low = 0; 359 high = nrow; 360 value = v; 361 while (high-low > 7) { 362 t = (low+high)/2; 363 if (rp[t] > col) high = t; 364 else low = t; 365 } 366 for (i=low; i<high; i++) { 367 if (rp[i] > col) break; 368 if (rp[i] == col) { 369 bap = ap + bs2*i; 370 if (roworiented) { 371 if (is == ADD_VALUES) { 372 for (ii=0; ii<bs; ii++) { 373 for (jj=ii; jj<bs2; jj+=bs) { 374 bap[jj] += *value++; 375 } 376 } 377 } else { 378 for (ii=0; ii<bs; ii++) { 379 for (jj=ii; jj<bs2; jj+=bs) { 380 bap[jj] = *value++; 381 } 382 } 383 } 384 } else { 385 if (is == ADD_VALUES) { 386 for (ii=0; ii<bs; ii++,value+=bs) { 387 for (jj=0; jj<bs; jj++) { 388 bap[jj] += value[jj]; 389 } 390 bap += bs; 391 } 392 } else { 393 for (ii=0; ii<bs; ii++,value+=bs) { 394 for (jj=0; jj<bs; jj++) { 395 bap[jj] = value[jj]; 396 } 397 bap += bs; 398 } 399 } 400 } 401 goto noinsert2; 402 } 403 } 404 if (nonew == 1) goto noinsert2; 405 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 406 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 407 N = nrow++ - 1; high++; 408 /* shift up all the later entries in this row */ 409 for (ii=N; ii>=i; ii--) { 410 rp[ii+1] = rp[ii]; 411 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 412 } 413 if (N >= i) { 414 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 415 } 416 rp[i] = col; 417 bap = ap + bs2*i; 418 if (roworiented) { 419 for (ii=0; ii<bs; ii++) { 420 for (jj=ii; jj<bs2; jj+=bs) { 421 bap[jj] = *value++; 422 } 423 } 424 } else { 425 for (ii=0; ii<bs; ii++) { 426 for (jj=0; jj<bs; jj++) { 427 *bap++ = *value++; 428 } 429 } 430 } 431 noinsert2:; 432 ailen[row] = nrow; 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNCT__ 437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 438 /* 439 This routine could be optimized by removing the need for the block copy below and passing stride information 440 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 441 */ 442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 443 { 444 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 445 const MatScalar *value; 446 MatScalar *barray =baij->barray; 447 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 448 PetscErrorCode ierr; 449 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 450 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 451 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 452 453 PetscFunctionBegin; 454 if (!barray) { 455 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 456 baij->barray = barray; 457 } 458 459 if (roworiented) { 460 stepval = (n-1)*bs; 461 } else { 462 stepval = (m-1)*bs; 463 } 464 for (i=0; i<m; i++) { 465 if (im[i] < 0) continue; 466 #if defined(PETSC_USE_DEBUG) 467 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 468 #endif 469 if (im[i] >= rstart && im[i] < rend) { 470 row = im[i] - rstart; 471 for (j=0; j<n; j++) { 472 if (im[i] > in[j]) { 473 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 474 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 475 } 476 /* If NumCol = 1 then a copy is not required */ 477 if ((roworiented) && (n == 1)) { 478 barray = (MatScalar*) v + i*bs2; 479 } else if ((!roworiented) && (m == 1)) { 480 barray = (MatScalar*) v + j*bs2; 481 } else { /* Here a copy is required */ 482 if (roworiented) { 483 value = v + i*(stepval+bs)*bs + j*bs; 484 } else { 485 value = v + j*(stepval+bs)*bs + i*bs; 486 } 487 for (ii=0; ii<bs; ii++,value+=stepval) { 488 for (jj=0; jj<bs; jj++) { 489 *barray++ = *value++; 490 } 491 } 492 barray -=bs2; 493 } 494 495 if (in[j] >= cstart && in[j] < cend) { 496 col = in[j] - cstart; 497 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 498 } else if (in[j] < 0) continue; 499 #if defined(PETSC_USE_DEBUG) 500 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 501 #endif 502 else { 503 if (mat->was_assembled) { 504 if (!baij->colmap) { 505 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 506 } 507 508 #if defined(PETSC_USE_DEBUG) 509 #if defined(PETSC_USE_CTABLE) 510 { PetscInt data; 511 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 512 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 513 } 514 #else 515 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 516 #endif 517 #endif 518 #if defined(PETSC_USE_CTABLE) 519 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col = (col - 1)/bs; 521 #else 522 col = (baij->colmap[in[j]] - 1)/bs; 523 #endif 524 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 } 528 } else col = in[j]; 529 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 530 } 531 } 532 } else { 533 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 534 if (!baij->donotstash) { 535 if (roworiented) { 536 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 537 } else { 538 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 539 } 540 } 541 } 542 } 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_MPISBAIJ" 548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 549 { 550 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 551 PetscErrorCode ierr; 552 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 553 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 554 555 PetscFunctionBegin; 556 for (i=0; i<m; i++) { 557 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 558 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 559 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 560 row = idxm[i] - bsrstart; 561 for (j=0; j<n; j++) { 562 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 563 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 564 if (idxn[j] >= bscstart && idxn[j] < bscend) { 565 col = idxn[j] - bscstart; 566 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 567 } else { 568 if (!baij->colmap) { 569 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 570 } 571 #if defined(PETSC_USE_CTABLE) 572 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 573 data--; 574 #else 575 data = baij->colmap[idxn[j]/bs]-1; 576 #endif 577 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 578 else { 579 col = data + idxn[j]%bs; 580 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } 582 } 583 } 584 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatNorm_MPISBAIJ" 591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 592 { 593 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscReal sum[2],*lnorm2; 596 597 PetscFunctionBegin; 598 if (baij->size == 1) { 599 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 600 } else { 601 if (type == NORM_FROBENIUS) { 602 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 603 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 605 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 606 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 607 ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 608 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 609 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 610 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 611 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 612 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 613 PetscReal *rsum,*rsum2,vabs; 614 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 615 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 616 MatScalar *v; 617 618 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 619 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 620 /* Amat */ 621 v = amat->a; jj = amat->j; 622 for (brow=0; brow<mbs; brow++) { 623 grow = bs*(rstart + brow); 624 nz = amat->i[brow+1] - amat->i[brow]; 625 for (bcol=0; bcol<nz; bcol++) { 626 gcol = bs*(rstart + *jj); jj++; 627 for (col=0; col<bs; col++) { 628 for (row=0; row<bs; row++) { 629 vabs = PetscAbsScalar(*v); v++; 630 rsum[gcol+col] += vabs; 631 /* non-diagonal block */ 632 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 633 } 634 } 635 } 636 } 637 /* Bmat */ 638 v = bmat->a; jj = bmat->j; 639 for (brow=0; brow<mbs; brow++) { 640 grow = bs*(rstart + brow); 641 nz = bmat->i[brow+1] - bmat->i[brow]; 642 for (bcol=0; bcol<nz; bcol++) { 643 gcol = bs*garray[*jj]; jj++; 644 for (col=0; col<bs; col++) { 645 for (row=0; row<bs; row++) { 646 vabs = PetscAbsScalar(*v); v++; 647 rsum[gcol+col] += vabs; 648 rsum[grow+row] += vabs; 649 } 650 } 651 } 652 } 653 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 654 *norm = 0.0; 655 for (col=0; col<mat->cmap->N; col++) { 656 if (rsum2[col] > *norm) *norm = rsum2[col]; 657 } 658 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 659 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 660 } 661 PetscFunctionReturn(0); 662 } 663 664 #undef __FUNCT__ 665 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 666 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 667 { 668 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 669 PetscErrorCode ierr; 670 PetscInt nstash,reallocs; 671 InsertMode addv; 672 673 PetscFunctionBegin; 674 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 675 676 /* make sure all processors are either in INSERTMODE or ADDMODE */ 677 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 678 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 679 mat->insertmode = addv; /* in case this processor had no cache */ 680 681 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 682 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 683 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 684 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 685 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 686 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 692 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 693 { 694 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 695 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 696 PetscErrorCode ierr; 697 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 698 PetscInt *row,*col; 699 PetscBool other_disassembled; 700 PetscMPIInt n; 701 PetscBool r1,r2,r3; 702 MatScalar *val; 703 InsertMode addv = mat->insertmode; 704 705 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 706 PetscFunctionBegin; 707 if (!baij->donotstash && !mat->nooffprocentries) { 708 while (1) { 709 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 710 if (!flg) break; 711 712 for (i=0; i<n;) { 713 /* Now identify the consecutive vals belonging to the same row */ 714 for (j=i,rstart=row[j]; j<n; j++) { 715 if (row[j] != rstart) break; 716 } 717 if (j < n) ncols = j-i; 718 else ncols = n-i; 719 /* Now assemble all these values with a single function call */ 720 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 721 i = j; 722 } 723 } 724 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 725 /* Now process the block-stash. Since the values are stashed column-oriented, 726 set the roworiented flag to column oriented, and after MatSetValues() 727 restore the original flags */ 728 r1 = baij->roworiented; 729 r2 = a->roworiented; 730 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 731 732 baij->roworiented = PETSC_FALSE; 733 a->roworiented = PETSC_FALSE; 734 735 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 736 while (1) { 737 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 738 if (!flg) break; 739 740 for (i=0; i<n;) { 741 /* Now identify the consecutive vals belonging to the same row */ 742 for (j=i,rstart=row[j]; j<n; j++) { 743 if (row[j] != rstart) break; 744 } 745 if (j < n) ncols = j-i; 746 else ncols = n-i; 747 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 748 i = j; 749 } 750 } 751 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 752 753 baij->roworiented = r1; 754 a->roworiented = r2; 755 756 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 757 } 758 759 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 760 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 761 762 /* determine if any processor has disassembled, if so we must 763 also disassemble ourselfs, in order that we may reassemble. */ 764 /* 765 if nonzero structure of submatrix B cannot change then we know that 766 no processor disassembled thus we can skip this stuff 767 */ 768 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 769 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 770 if (mat->was_assembled && !other_disassembled) { 771 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 772 } 773 } 774 775 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 776 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 777 } 778 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 779 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 780 781 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 782 783 baij->rowvalues = 0; 784 785 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 786 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 787 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 788 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 789 } 790 PetscFunctionReturn(0); 791 } 792 793 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer); 794 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 795 #include <petscdraw.h> 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 798 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 799 { 800 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 801 PetscErrorCode ierr; 802 PetscInt bs = mat->rmap->bs; 803 PetscMPIInt rank = baij->rank; 804 PetscBool iascii,isdraw; 805 PetscViewer sviewer; 806 PetscViewerFormat format; 807 808 PetscFunctionBegin; 809 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 810 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 811 if (iascii) { 812 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 813 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 814 MatInfo info; 815 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 816 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 817 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 818 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 819 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 820 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 821 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 822 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 823 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 824 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 825 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 826 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } else if (format == PETSC_VIEWER_ASCII_INFO) { 829 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 830 PetscFunctionReturn(0); 831 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 832 PetscFunctionReturn(0); 833 } 834 } 835 836 if (isdraw) { 837 PetscDraw draw; 838 PetscBool isnull; 839 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 840 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 841 } 842 843 { 844 /* assemble the entire matrix onto first processor. */ 845 Mat A; 846 Mat_SeqSBAIJ *Aloc; 847 Mat_SeqBAIJ *Bloc; 848 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 849 MatScalar *a; 850 const char *matname; 851 852 /* Should this be the same type as mat? */ 853 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 854 if (!rank) { 855 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 856 } else { 857 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 858 } 859 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 860 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 861 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 862 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 863 864 /* copy over the A part */ 865 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 866 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 867 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 868 869 for (i=0; i<mbs; i++) { 870 rvals[0] = bs*(baij->rstartbs + i); 871 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 872 for (j=ai[i]; j<ai[i+1]; j++) { 873 col = (baij->cstartbs+aj[j])*bs; 874 for (k=0; k<bs; k++) { 875 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 876 col++; 877 a += bs; 878 } 879 } 880 } 881 /* copy over the B part */ 882 Bloc = (Mat_SeqBAIJ*)baij->B->data; 883 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 884 for (i=0; i<mbs; i++) { 885 886 rvals[0] = bs*(baij->rstartbs + i); 887 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 888 for (j=ai[i]; j<ai[i+1]; j++) { 889 col = baij->garray[aj[j]]*bs; 890 for (k=0; k<bs; k++) { 891 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 892 col++; 893 a += bs; 894 } 895 } 896 } 897 ierr = PetscFree(rvals);CHKERRQ(ierr); 898 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 899 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 900 /* 901 Everyone has to call to draw the matrix since the graphics waits are 902 synchronized across all processors that share the PetscDraw object 903 */ 904 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 905 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 906 if (!rank) { 907 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 908 ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 909 } 910 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 911 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 912 ierr = MatDestroy(&A);CHKERRQ(ierr); 913 } 914 PetscFunctionReturn(0); 915 } 916 917 #undef __FUNCT__ 918 #define __FUNCT__ "MatView_MPISBAIJ" 919 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 920 { 921 PetscErrorCode ierr; 922 PetscBool iascii,isdraw,issocket,isbinary; 923 924 PetscFunctionBegin; 925 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 926 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 927 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 928 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 929 if (iascii || isdraw || issocket || isbinary) { 930 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 931 } 932 PetscFunctionReturn(0); 933 } 934 935 #undef __FUNCT__ 936 #define __FUNCT__ "MatDestroy_MPISBAIJ" 937 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 938 { 939 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 940 PetscErrorCode ierr; 941 942 PetscFunctionBegin; 943 #if defined(PETSC_USE_LOG) 944 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 945 #endif 946 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 947 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 948 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 949 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 950 #if defined(PETSC_USE_CTABLE) 951 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 952 #else 953 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 954 #endif 955 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 956 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 957 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 958 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 959 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 960 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 961 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 962 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 963 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 964 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 965 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 966 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 967 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 968 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 969 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 970 #if defined(PETSC_USE_REAL_MAT_SINGLE) 971 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 972 #endif 973 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 974 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 975 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 976 ierr = PetscFree(mat->data);CHKERRQ(ierr); 977 978 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 979 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 980 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 981 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 982 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 983 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 984 #if defined(PETSC_HAVE_ELEMENTAL) 985 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 986 #endif 987 PetscFunctionReturn(0); 988 } 989 990 #undef __FUNCT__ 991 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 992 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 993 { 994 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 995 PetscErrorCode ierr; 996 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 997 PetscScalar *from; 998 const PetscScalar *x; 999 1000 PetscFunctionBegin; 1001 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1002 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1003 1004 /* diagonal part */ 1005 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1006 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1007 1008 /* subdiagonal part */ 1009 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1010 1011 /* copy x into the vec slvec0 */ 1012 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1013 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1014 1015 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1016 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1017 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1018 1019 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1020 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1021 /* supperdiagonal part */ 1022 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1023 PetscFunctionReturn(0); 1024 } 1025 1026 #undef __FUNCT__ 1027 #define __FUNCT__ "MatMult_MPISBAIJ" 1028 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1029 { 1030 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1031 PetscErrorCode ierr; 1032 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1033 PetscScalar *from; 1034 const PetscScalar *x; 1035 1036 PetscFunctionBegin; 1037 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1038 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1039 1040 /* diagonal part */ 1041 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1042 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1043 1044 /* subdiagonal part */ 1045 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1046 1047 /* copy x into the vec slvec0 */ 1048 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1049 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1050 1051 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1052 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1053 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1054 1055 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1056 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1057 /* supperdiagonal part */ 1058 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1059 PetscFunctionReturn(0); 1060 } 1061 1062 #undef __FUNCT__ 1063 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1064 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1065 { 1066 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1067 PetscErrorCode ierr; 1068 PetscInt nt; 1069 1070 PetscFunctionBegin; 1071 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1072 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1073 1074 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1075 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1076 1077 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1078 /* do diagonal part */ 1079 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1080 /* do supperdiagonal part */ 1081 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1082 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1083 /* do subdiagonal part */ 1084 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1085 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1086 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1087 PetscFunctionReturn(0); 1088 } 1089 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1092 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1093 { 1094 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1095 PetscErrorCode ierr; 1096 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1097 PetscScalar *from,zero=0.0; 1098 const PetscScalar *x; 1099 1100 PetscFunctionBegin; 1101 /* 1102 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1103 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1104 */ 1105 /* diagonal part */ 1106 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1107 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1108 1109 /* subdiagonal part */ 1110 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1111 1112 /* copy x into the vec slvec0 */ 1113 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1114 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1115 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1116 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1117 1118 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1120 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1121 1122 /* supperdiagonal part */ 1123 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1129 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1130 { 1131 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1136 /* do diagonal part */ 1137 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1138 /* do supperdiagonal part */ 1139 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1141 1142 /* do subdiagonal part */ 1143 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1144 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1145 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 /* 1150 This only works correctly for square matrices where the subblock A->A is the 1151 diagonal block 1152 */ 1153 #undef __FUNCT__ 1154 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1155 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1156 { 1157 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1158 PetscErrorCode ierr; 1159 1160 PetscFunctionBegin; 1161 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1162 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1163 PetscFunctionReturn(0); 1164 } 1165 1166 #undef __FUNCT__ 1167 #define __FUNCT__ "MatScale_MPISBAIJ" 1168 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1169 { 1170 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1171 PetscErrorCode ierr; 1172 1173 PetscFunctionBegin; 1174 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1175 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1181 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1182 { 1183 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1184 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1185 PetscErrorCode ierr; 1186 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1187 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1188 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1189 1190 PetscFunctionBegin; 1191 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1192 mat->getrowactive = PETSC_TRUE; 1193 1194 if (!mat->rowvalues && (idx || v)) { 1195 /* 1196 allocate enough space to hold information from the longest row. 1197 */ 1198 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1199 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1200 PetscInt max = 1,mbs = mat->mbs,tmp; 1201 for (i=0; i<mbs; i++) { 1202 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1203 if (max < tmp) max = tmp; 1204 } 1205 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1206 } 1207 1208 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1209 lrow = row - brstart; /* local row index */ 1210 1211 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1212 if (!v) {pvA = 0; pvB = 0;} 1213 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1214 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1215 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1216 nztot = nzA + nzB; 1217 1218 cmap = mat->garray; 1219 if (v || idx) { 1220 if (nztot) { 1221 /* Sort by increasing column numbers, assuming A and B already sorted */ 1222 PetscInt imark = -1; 1223 if (v) { 1224 *v = v_p = mat->rowvalues; 1225 for (i=0; i<nzB; i++) { 1226 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1227 else break; 1228 } 1229 imark = i; 1230 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1231 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1232 } 1233 if (idx) { 1234 *idx = idx_p = mat->rowindices; 1235 if (imark > -1) { 1236 for (i=0; i<imark; i++) { 1237 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1238 } 1239 } else { 1240 for (i=0; i<nzB; i++) { 1241 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1242 else break; 1243 } 1244 imark = i; 1245 } 1246 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1247 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1248 } 1249 } else { 1250 if (idx) *idx = 0; 1251 if (v) *v = 0; 1252 } 1253 } 1254 *nz = nztot; 1255 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1256 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1262 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1263 { 1264 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1265 1266 PetscFunctionBegin; 1267 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1268 baij->getrowactive = PETSC_FALSE; 1269 PetscFunctionReturn(0); 1270 } 1271 1272 #undef __FUNCT__ 1273 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1274 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1275 { 1276 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1277 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1278 1279 PetscFunctionBegin; 1280 aA->getrow_utriangular = PETSC_TRUE; 1281 PetscFunctionReturn(0); 1282 } 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1285 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1286 { 1287 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1288 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1289 1290 PetscFunctionBegin; 1291 aA->getrow_utriangular = PETSC_FALSE; 1292 PetscFunctionReturn(0); 1293 } 1294 1295 #undef __FUNCT__ 1296 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1297 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1298 { 1299 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1300 PetscErrorCode ierr; 1301 1302 PetscFunctionBegin; 1303 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1304 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1310 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1311 { 1312 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1313 PetscErrorCode ierr; 1314 1315 PetscFunctionBegin; 1316 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1317 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ() 1322 Input: isrow - distributed(parallel), 1323 iscol_local - locally owned (seq) 1324 */ 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "ISEqual_private" 1327 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1328 { 1329 PetscErrorCode ierr; 1330 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1331 const PetscInt *ptr1,*ptr2; 1332 1333 PetscFunctionBegin; 1334 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1335 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1336 if (sz1 > sz2) { 1337 *flg = PETSC_FALSE; 1338 PetscFunctionReturn(0); 1339 } 1340 1341 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1342 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1343 1344 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1345 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1346 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1347 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1348 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1349 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1350 1351 nmatch=0; 1352 k = 0; 1353 for (i=0; i<sz1; i++){ 1354 for (j=k; j<sz2; j++){ 1355 if (a1[i] == a2[j]) { 1356 k = j; nmatch++; 1357 break; 1358 } 1359 } 1360 } 1361 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1362 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1363 ierr = PetscFree(a1);CHKERRQ(ierr); 1364 ierr = PetscFree(a2);CHKERRQ(ierr); 1365 if (nmatch < sz1) { 1366 *flg = PETSC_FALSE; 1367 } else { 1368 *flg = PETSC_TRUE; 1369 } 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ" 1375 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1376 { 1377 PetscErrorCode ierr; 1378 IS iscol_local; 1379 PetscInt csize; 1380 PetscBool isequal; 1381 1382 PetscFunctionBegin; 1383 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1384 if (call == MAT_REUSE_MATRIX) { 1385 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1386 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1387 } else { 1388 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1389 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1390 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1391 } 1392 1393 /* now call MatGetSubMatrix_MPIBAIJ() */ 1394 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1395 if (call == MAT_INITIAL_MATRIX) { 1396 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1397 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1398 } 1399 PetscFunctionReturn(0); 1400 } 1401 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1404 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1405 { 1406 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1407 PetscErrorCode ierr; 1408 1409 PetscFunctionBegin; 1410 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1411 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1417 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1418 { 1419 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1420 Mat A = a->A,B = a->B; 1421 PetscErrorCode ierr; 1422 PetscReal isend[5],irecv[5]; 1423 1424 PetscFunctionBegin; 1425 info->block_size = (PetscReal)matin->rmap->bs; 1426 1427 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1428 1429 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1430 isend[3] = info->memory; isend[4] = info->mallocs; 1431 1432 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1433 1434 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1435 isend[3] += info->memory; isend[4] += info->mallocs; 1436 if (flag == MAT_LOCAL) { 1437 info->nz_used = isend[0]; 1438 info->nz_allocated = isend[1]; 1439 info->nz_unneeded = isend[2]; 1440 info->memory = isend[3]; 1441 info->mallocs = isend[4]; 1442 } else if (flag == MAT_GLOBAL_MAX) { 1443 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1444 1445 info->nz_used = irecv[0]; 1446 info->nz_allocated = irecv[1]; 1447 info->nz_unneeded = irecv[2]; 1448 info->memory = irecv[3]; 1449 info->mallocs = irecv[4]; 1450 } else if (flag == MAT_GLOBAL_SUM) { 1451 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1452 1453 info->nz_used = irecv[0]; 1454 info->nz_allocated = irecv[1]; 1455 info->nz_unneeded = irecv[2]; 1456 info->memory = irecv[3]; 1457 info->mallocs = irecv[4]; 1458 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1459 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1460 info->fill_ratio_needed = 0; 1461 info->factor_mallocs = 0; 1462 PetscFunctionReturn(0); 1463 } 1464 1465 #undef __FUNCT__ 1466 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1467 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1468 { 1469 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1470 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1471 PetscErrorCode ierr; 1472 1473 PetscFunctionBegin; 1474 switch (op) { 1475 case MAT_NEW_NONZERO_LOCATIONS: 1476 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1477 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1478 case MAT_KEEP_NONZERO_PATTERN: 1479 case MAT_NEW_NONZERO_LOCATION_ERR: 1480 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1481 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1482 break; 1483 case MAT_ROW_ORIENTED: 1484 a->roworiented = flg; 1485 1486 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1487 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1488 break; 1489 case MAT_NEW_DIAGONALS: 1490 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1491 break; 1492 case MAT_IGNORE_OFF_PROC_ENTRIES: 1493 a->donotstash = flg; 1494 break; 1495 case MAT_USE_HASH_TABLE: 1496 a->ht_flag = flg; 1497 break; 1498 case MAT_HERMITIAN: 1499 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1500 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1501 1502 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1503 break; 1504 case MAT_SPD: 1505 A->spd_set = PETSC_TRUE; 1506 A->spd = flg; 1507 if (flg) { 1508 A->symmetric = PETSC_TRUE; 1509 A->structurally_symmetric = PETSC_TRUE; 1510 A->symmetric_set = PETSC_TRUE; 1511 A->structurally_symmetric_set = PETSC_TRUE; 1512 } 1513 break; 1514 case MAT_SYMMETRIC: 1515 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1516 break; 1517 case MAT_STRUCTURALLY_SYMMETRIC: 1518 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1519 break; 1520 case MAT_SYMMETRY_ETERNAL: 1521 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1522 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1523 break; 1524 case MAT_IGNORE_LOWER_TRIANGULAR: 1525 aA->ignore_ltriangular = flg; 1526 break; 1527 case MAT_ERROR_LOWER_TRIANGULAR: 1528 aA->ignore_ltriangular = flg; 1529 break; 1530 case MAT_GETROW_UPPERTRIANGULAR: 1531 aA->getrow_utriangular = flg; 1532 break; 1533 default: 1534 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1535 } 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1541 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1542 { 1543 PetscErrorCode ierr; 1544 1545 PetscFunctionBegin; 1546 if (MAT_INITIAL_MATRIX || *B != A) { 1547 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1548 } 1549 PetscFunctionReturn(0); 1550 } 1551 1552 #undef __FUNCT__ 1553 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1554 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1555 { 1556 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1557 Mat a = baij->A, b=baij->B; 1558 PetscErrorCode ierr; 1559 PetscInt nv,m,n; 1560 PetscBool flg; 1561 1562 PetscFunctionBegin; 1563 if (ll != rr) { 1564 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1565 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1566 } 1567 if (!ll) PetscFunctionReturn(0); 1568 1569 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1570 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1571 1572 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1573 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1574 1575 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1576 1577 /* left diagonalscale the off-diagonal part */ 1578 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1579 1580 /* scale the diagonal part */ 1581 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1582 1583 /* right diagonalscale the off-diagonal part */ 1584 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1591 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1592 { 1593 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1594 PetscErrorCode ierr; 1595 1596 PetscFunctionBegin; 1597 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1598 PetscFunctionReturn(0); 1599 } 1600 1601 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1602 1603 #undef __FUNCT__ 1604 #define __FUNCT__ "MatEqual_MPISBAIJ" 1605 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1606 { 1607 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1608 Mat a,b,c,d; 1609 PetscBool flg; 1610 PetscErrorCode ierr; 1611 1612 PetscFunctionBegin; 1613 a = matA->A; b = matA->B; 1614 c = matB->A; d = matB->B; 1615 1616 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1617 if (flg) { 1618 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1619 } 1620 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1621 PetscFunctionReturn(0); 1622 } 1623 1624 #undef __FUNCT__ 1625 #define __FUNCT__ "MatCopy_MPISBAIJ" 1626 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1627 { 1628 PetscErrorCode ierr; 1629 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1630 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1631 1632 PetscFunctionBegin; 1633 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1634 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1635 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1636 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1637 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1638 } else { 1639 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1640 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1641 } 1642 PetscFunctionReturn(0); 1643 } 1644 1645 #undef __FUNCT__ 1646 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1647 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1648 { 1649 PetscErrorCode ierr; 1650 1651 PetscFunctionBegin; 1652 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1653 PetscFunctionReturn(0); 1654 } 1655 1656 #undef __FUNCT__ 1657 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1658 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1659 { 1660 PetscErrorCode ierr; 1661 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1662 PetscBLASInt bnz,one=1; 1663 Mat_SeqSBAIJ *xa,*ya; 1664 Mat_SeqBAIJ *xb,*yb; 1665 1666 PetscFunctionBegin; 1667 if (str == SAME_NONZERO_PATTERN) { 1668 PetscScalar alpha = a; 1669 xa = (Mat_SeqSBAIJ*)xx->A->data; 1670 ya = (Mat_SeqSBAIJ*)yy->A->data; 1671 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1672 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1673 xb = (Mat_SeqBAIJ*)xx->B->data; 1674 yb = (Mat_SeqBAIJ*)yy->B->data; 1675 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1676 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1677 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1678 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1679 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1680 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1681 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1682 } else { 1683 Mat B; 1684 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1685 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1686 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1687 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1688 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1689 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1690 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1691 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1692 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1693 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1694 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1695 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1696 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1697 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1698 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1699 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 1700 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1701 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1702 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1703 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1704 } 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1710 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1711 { 1712 PetscErrorCode ierr; 1713 PetscInt i; 1714 PetscBool flg,sorted; 1715 1716 PetscFunctionBegin; 1717 for (i = 0; i < n; i++) { 1718 ierr = ISSorted(irow[i],&sorted);CHKERRQ(ierr); 1719 if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row index set %d not sorted",i); 1720 ierr = ISSorted(icol[i],&sorted);CHKERRQ(ierr); 1721 if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Column index set %d not sorted",i); 1722 } 1723 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1724 for (i=0; i<n; i++) { 1725 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1726 if (!flg) { /* *B[i] is non-symmetric, set flag */ 1727 ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr); 1728 } 1729 } 1730 PetscFunctionReturn(0); 1731 } 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatShift_MPISBAIJ" 1735 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1736 { 1737 PetscErrorCode ierr; 1738 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1739 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1740 1741 PetscFunctionBegin; 1742 if (!Y->preallocated) { 1743 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1744 } else if (!aij->nz) { 1745 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1746 } 1747 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1748 PetscFunctionReturn(0); 1749 } 1750 1751 /* -------------------------------------------------------------------*/ 1752 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1753 MatGetRow_MPISBAIJ, 1754 MatRestoreRow_MPISBAIJ, 1755 MatMult_MPISBAIJ, 1756 /* 4*/ MatMultAdd_MPISBAIJ, 1757 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1758 MatMultAdd_MPISBAIJ, 1759 0, 1760 0, 1761 0, 1762 /* 10*/ 0, 1763 0, 1764 0, 1765 MatSOR_MPISBAIJ, 1766 MatTranspose_MPISBAIJ, 1767 /* 15*/ MatGetInfo_MPISBAIJ, 1768 MatEqual_MPISBAIJ, 1769 MatGetDiagonal_MPISBAIJ, 1770 MatDiagonalScale_MPISBAIJ, 1771 MatNorm_MPISBAIJ, 1772 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1773 MatAssemblyEnd_MPISBAIJ, 1774 MatSetOption_MPISBAIJ, 1775 MatZeroEntries_MPISBAIJ, 1776 /* 24*/ 0, 1777 0, 1778 0, 1779 0, 1780 0, 1781 /* 29*/ MatSetUp_MPISBAIJ, 1782 0, 1783 0, 1784 0, 1785 0, 1786 /* 34*/ MatDuplicate_MPISBAIJ, 1787 0, 1788 0, 1789 0, 1790 0, 1791 /* 39*/ MatAXPY_MPISBAIJ, 1792 MatGetSubMatrices_MPISBAIJ, 1793 MatIncreaseOverlap_MPISBAIJ, 1794 MatGetValues_MPISBAIJ, 1795 MatCopy_MPISBAIJ, 1796 /* 44*/ 0, 1797 MatScale_MPISBAIJ, 1798 MatShift_MPISBAIJ, 1799 0, 1800 0, 1801 /* 49*/ 0, 1802 0, 1803 0, 1804 0, 1805 0, 1806 /* 54*/ 0, 1807 0, 1808 MatSetUnfactored_MPISBAIJ, 1809 0, 1810 MatSetValuesBlocked_MPISBAIJ, 1811 /* 59*/ MatGetSubMatrix_MPISBAIJ, 1812 0, 1813 0, 1814 0, 1815 0, 1816 /* 64*/ 0, 1817 0, 1818 0, 1819 0, 1820 0, 1821 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1822 0, 1823 0, 1824 0, 1825 0, 1826 /* 74*/ 0, 1827 0, 1828 0, 1829 0, 1830 0, 1831 /* 79*/ 0, 1832 0, 1833 0, 1834 0, 1835 MatLoad_MPISBAIJ, 1836 /* 84*/ 0, 1837 0, 1838 0, 1839 0, 1840 0, 1841 /* 89*/ 0, 1842 0, 1843 0, 1844 0, 1845 0, 1846 /* 94*/ 0, 1847 0, 1848 0, 1849 0, 1850 0, 1851 /* 99*/ 0, 1852 0, 1853 0, 1854 0, 1855 0, 1856 /*104*/ 0, 1857 MatRealPart_MPISBAIJ, 1858 MatImaginaryPart_MPISBAIJ, 1859 MatGetRowUpperTriangular_MPISBAIJ, 1860 MatRestoreRowUpperTriangular_MPISBAIJ, 1861 /*109*/ 0, 1862 0, 1863 0, 1864 0, 1865 0, 1866 /*114*/ 0, 1867 0, 1868 0, 1869 0, 1870 0, 1871 /*119*/ 0, 1872 0, 1873 0, 1874 0, 1875 0, 1876 /*124*/ 0, 1877 0, 1878 0, 1879 0, 1880 0, 1881 /*129*/ 0, 1882 0, 1883 0, 1884 0, 1885 0, 1886 /*134*/ 0, 1887 0, 1888 0, 1889 0, 1890 0, 1891 /*139*/ 0, 1892 0, 1893 0, 1894 0, 1895 0, 1896 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1897 }; 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1901 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1902 { 1903 PetscFunctionBegin; 1904 *a = ((Mat_MPISBAIJ*)A->data)->A; 1905 PetscFunctionReturn(0); 1906 } 1907 1908 #undef __FUNCT__ 1909 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1910 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1911 { 1912 Mat_MPISBAIJ *b; 1913 PetscErrorCode ierr; 1914 PetscInt i,mbs,Mbs; 1915 1916 PetscFunctionBegin; 1917 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1918 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1919 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1920 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1921 1922 b = (Mat_MPISBAIJ*)B->data; 1923 mbs = B->rmap->n/bs; 1924 Mbs = B->rmap->N/bs; 1925 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1926 1927 B->rmap->bs = bs; 1928 b->bs2 = bs*bs; 1929 b->mbs = mbs; 1930 b->Mbs = Mbs; 1931 b->nbs = B->cmap->n/bs; 1932 b->Nbs = B->cmap->N/bs; 1933 1934 for (i=0; i<=b->size; i++) { 1935 b->rangebs[i] = B->rmap->range[i]/bs; 1936 } 1937 b->rstartbs = B->rmap->rstart/bs; 1938 b->rendbs = B->rmap->rend/bs; 1939 1940 b->cstartbs = B->cmap->rstart/bs; 1941 b->cendbs = B->cmap->rend/bs; 1942 1943 if (!B->preallocated) { 1944 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1945 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1946 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1947 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1948 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1949 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1950 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1951 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1952 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1953 } 1954 1955 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1956 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1957 1958 B->preallocated = PETSC_TRUE; 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1964 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1965 { 1966 PetscInt m,rstart,cstart,cend; 1967 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1968 const PetscInt *JJ =0; 1969 PetscScalar *values=0; 1970 PetscErrorCode ierr; 1971 1972 PetscFunctionBegin; 1973 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1974 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1975 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1976 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1977 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1978 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1979 m = B->rmap->n/bs; 1980 rstart = B->rmap->rstart/bs; 1981 cstart = B->cmap->rstart/bs; 1982 cend = B->cmap->rend/bs; 1983 1984 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1985 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 1986 for (i=0; i<m; i++) { 1987 nz = ii[i+1] - ii[i]; 1988 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1989 nz_max = PetscMax(nz_max,nz); 1990 JJ = jj + ii[i]; 1991 for (j=0; j<nz; j++) { 1992 if (*JJ >= cstart) break; 1993 JJ++; 1994 } 1995 d = 0; 1996 for (; j<nz; j++) { 1997 if (*JJ++ >= cend) break; 1998 d++; 1999 } 2000 d_nnz[i] = d; 2001 o_nnz[i] = nz - d; 2002 } 2003 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2004 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2005 2006 values = (PetscScalar*)V; 2007 if (!values) { 2008 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2009 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2010 } 2011 for (i=0; i<m; i++) { 2012 PetscInt row = i + rstart; 2013 PetscInt ncols = ii[i+1] - ii[i]; 2014 const PetscInt *icols = jj + ii[i]; 2015 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2016 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2017 } 2018 2019 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2020 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2021 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2022 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2023 PetscFunctionReturn(0); 2024 } 2025 2026 /*MC 2027 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2028 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2029 the matrix is stored. 2030 2031 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2032 can call MatSetOption(Mat, MAT_HERMITIAN); 2033 2034 Options Database Keys: 2035 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2036 2037 Level: beginner 2038 2039 .seealso: MatCreateMPISBAIJ 2040 M*/ 2041 2042 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 2043 2044 #undef __FUNCT__ 2045 #define __FUNCT__ "MatCreate_MPISBAIJ" 2046 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2047 { 2048 Mat_MPISBAIJ *b; 2049 PetscErrorCode ierr; 2050 PetscBool flg = PETSC_FALSE; 2051 2052 PetscFunctionBegin; 2053 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2054 B->data = (void*)b; 2055 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2056 2057 B->ops->destroy = MatDestroy_MPISBAIJ; 2058 B->ops->view = MatView_MPISBAIJ; 2059 B->assembled = PETSC_FALSE; 2060 B->insertmode = NOT_SET_VALUES; 2061 2062 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2063 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2064 2065 /* build local table of row and column ownerships */ 2066 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2067 2068 /* build cache for off array entries formed */ 2069 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2070 2071 b->donotstash = PETSC_FALSE; 2072 b->colmap = NULL; 2073 b->garray = NULL; 2074 b->roworiented = PETSC_TRUE; 2075 2076 /* stuff used in block assembly */ 2077 b->barray = 0; 2078 2079 /* stuff used for matrix vector multiply */ 2080 b->lvec = 0; 2081 b->Mvctx = 0; 2082 b->slvec0 = 0; 2083 b->slvec0b = 0; 2084 b->slvec1 = 0; 2085 b->slvec1a = 0; 2086 b->slvec1b = 0; 2087 b->sMvctx = 0; 2088 2089 /* stuff for MatGetRow() */ 2090 b->rowindices = 0; 2091 b->rowvalues = 0; 2092 b->getrowactive = PETSC_FALSE; 2093 2094 /* hash table stuff */ 2095 b->ht = 0; 2096 b->hd = 0; 2097 b->ht_size = 0; 2098 b->ht_flag = PETSC_FALSE; 2099 b->ht_fact = 0; 2100 b->ht_total_ct = 0; 2101 b->ht_insert_ct = 0; 2102 2103 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 2104 b->ijonly = PETSC_FALSE; 2105 2106 b->in_loc = 0; 2107 b->v_loc = 0; 2108 b->n_loc = 0; 2109 2110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2111 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 2113 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2114 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2115 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 2116 #if defined(PETSC_HAVE_ELEMENTAL) 2117 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2118 #endif 2119 2120 B->symmetric = PETSC_TRUE; 2121 B->structurally_symmetric = PETSC_TRUE; 2122 B->symmetric_set = PETSC_TRUE; 2123 B->structurally_symmetric_set = PETSC_TRUE; 2124 2125 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2126 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2127 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2128 if (flg) { 2129 PetscReal fact = 1.39; 2130 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2131 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2132 if (fact <= 1.0) fact = 1.39; 2133 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2134 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2135 } 2136 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2137 PetscFunctionReturn(0); 2138 } 2139 2140 /*MC 2141 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2142 2143 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2144 and MATMPISBAIJ otherwise. 2145 2146 Options Database Keys: 2147 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2148 2149 Level: beginner 2150 2151 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2152 M*/ 2153 2154 #undef __FUNCT__ 2155 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 2156 /*@C 2157 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2158 the user should preallocate the matrix storage by setting the parameters 2159 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2160 performance can be increased by more than a factor of 50. 2161 2162 Collective on Mat 2163 2164 Input Parameters: 2165 + B - the matrix 2166 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2167 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2168 . d_nz - number of block nonzeros per block row in diagonal portion of local 2169 submatrix (same for all local rows) 2170 . d_nnz - array containing the number of block nonzeros in the various block rows 2171 in the upper triangular and diagonal part of the in diagonal portion of the local 2172 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2173 for the diagonal entry and set a value even if it is zero. 2174 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2175 submatrix (same for all local rows). 2176 - o_nnz - array containing the number of nonzeros in the various block rows of the 2177 off-diagonal portion of the local submatrix that is right of the diagonal 2178 (possibly different for each block row) or NULL. 2179 2180 2181 Options Database Keys: 2182 . -mat_no_unroll - uses code that does not unroll the loops in the 2183 block calculations (much slower) 2184 . -mat_block_size - size of the blocks to use 2185 2186 Notes: 2187 2188 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2189 than it must be used on all processors that share the object for that argument. 2190 2191 If the *_nnz parameter is given then the *_nz parameter is ignored 2192 2193 Storage Information: 2194 For a square global matrix we define each processor's diagonal portion 2195 to be its local rows and the corresponding columns (a square submatrix); 2196 each processor's off-diagonal portion encompasses the remainder of the 2197 local matrix (a rectangular submatrix). 2198 2199 The user can specify preallocated storage for the diagonal part of 2200 the local submatrix with either d_nz or d_nnz (not both). Set 2201 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2202 memory allocation. Likewise, specify preallocated storage for the 2203 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2204 2205 You can call MatGetInfo() to get information on how effective the preallocation was; 2206 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2207 You can also run with the option -info and look for messages with the string 2208 malloc in them to see if additional memory allocation was needed. 2209 2210 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2211 the figure below we depict these three local rows and all columns (0-11). 2212 2213 .vb 2214 0 1 2 3 4 5 6 7 8 9 10 11 2215 -------------------------- 2216 row 3 |. . . d d d o o o o o o 2217 row 4 |. . . d d d o o o o o o 2218 row 5 |. . . d d d o o o o o o 2219 -------------------------- 2220 .ve 2221 2222 Thus, any entries in the d locations are stored in the d (diagonal) 2223 submatrix, and any entries in the o locations are stored in the 2224 o (off-diagonal) submatrix. Note that the d matrix is stored in 2225 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2226 2227 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2228 plus the diagonal part of the d matrix, 2229 and o_nz should indicate the number of block nonzeros per row in the o matrix 2230 2231 In general, for PDE problems in which most nonzeros are near the diagonal, 2232 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2233 or you will get TERRIBLE performance; see the users' manual chapter on 2234 matrices. 2235 2236 Level: intermediate 2237 2238 .keywords: matrix, block, aij, compressed row, sparse, parallel 2239 2240 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2241 @*/ 2242 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2243 { 2244 PetscErrorCode ierr; 2245 2246 PetscFunctionBegin; 2247 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2248 PetscValidType(B,1); 2249 PetscValidLogicalCollectiveInt(B,bs,2); 2250 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2251 PetscFunctionReturn(0); 2252 } 2253 2254 #undef __FUNCT__ 2255 #define __FUNCT__ "MatCreateSBAIJ" 2256 /*@C 2257 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2258 (block compressed row). For good matrix assembly performance 2259 the user should preallocate the matrix storage by setting the parameters 2260 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2261 performance can be increased by more than a factor of 50. 2262 2263 Collective on MPI_Comm 2264 2265 Input Parameters: 2266 + comm - MPI communicator 2267 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2268 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2269 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2270 This value should be the same as the local size used in creating the 2271 y vector for the matrix-vector product y = Ax. 2272 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2273 This value should be the same as the local size used in creating the 2274 x vector for the matrix-vector product y = Ax. 2275 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2276 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2277 . d_nz - number of block nonzeros per block row in diagonal portion of local 2278 submatrix (same for all local rows) 2279 . d_nnz - array containing the number of block nonzeros in the various block rows 2280 in the upper triangular portion of the in diagonal portion of the local 2281 (possibly different for each block block row) or NULL. 2282 If you plan to factor the matrix you must leave room for the diagonal entry and 2283 set its value even if it is zero. 2284 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2285 submatrix (same for all local rows). 2286 - o_nnz - array containing the number of nonzeros in the various block rows of the 2287 off-diagonal portion of the local submatrix (possibly different for 2288 each block row) or NULL. 2289 2290 Output Parameter: 2291 . A - the matrix 2292 2293 Options Database Keys: 2294 . -mat_no_unroll - uses code that does not unroll the loops in the 2295 block calculations (much slower) 2296 . -mat_block_size - size of the blocks to use 2297 . -mat_mpi - use the parallel matrix data structures even on one processor 2298 (defaults to using SeqBAIJ format on one processor) 2299 2300 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2301 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2302 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2303 2304 Notes: 2305 The number of rows and columns must be divisible by blocksize. 2306 This matrix type does not support complex Hermitian operation. 2307 2308 The user MUST specify either the local or global matrix dimensions 2309 (possibly both). 2310 2311 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2312 than it must be used on all processors that share the object for that argument. 2313 2314 If the *_nnz parameter is given then the *_nz parameter is ignored 2315 2316 Storage Information: 2317 For a square global matrix we define each processor's diagonal portion 2318 to be its local rows and the corresponding columns (a square submatrix); 2319 each processor's off-diagonal portion encompasses the remainder of the 2320 local matrix (a rectangular submatrix). 2321 2322 The user can specify preallocated storage for the diagonal part of 2323 the local submatrix with either d_nz or d_nnz (not both). Set 2324 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2325 memory allocation. Likewise, specify preallocated storage for the 2326 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2327 2328 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2329 the figure below we depict these three local rows and all columns (0-11). 2330 2331 .vb 2332 0 1 2 3 4 5 6 7 8 9 10 11 2333 -------------------------- 2334 row 3 |. . . d d d o o o o o o 2335 row 4 |. . . d d d o o o o o o 2336 row 5 |. . . d d d o o o o o o 2337 -------------------------- 2338 .ve 2339 2340 Thus, any entries in the d locations are stored in the d (diagonal) 2341 submatrix, and any entries in the o locations are stored in the 2342 o (off-diagonal) submatrix. Note that the d matrix is stored in 2343 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2344 2345 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2346 plus the diagonal part of the d matrix, 2347 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2348 In general, for PDE problems in which most nonzeros are near the diagonal, 2349 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2350 or you will get TERRIBLE performance; see the users' manual chapter on 2351 matrices. 2352 2353 Level: intermediate 2354 2355 .keywords: matrix, block, aij, compressed row, sparse, parallel 2356 2357 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2358 @*/ 2359 2360 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2361 { 2362 PetscErrorCode ierr; 2363 PetscMPIInt size; 2364 2365 PetscFunctionBegin; 2366 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2367 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2368 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2369 if (size > 1) { 2370 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2371 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2372 } else { 2373 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2374 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2375 } 2376 PetscFunctionReturn(0); 2377 } 2378 2379 2380 #undef __FUNCT__ 2381 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2382 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2383 { 2384 Mat mat; 2385 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2386 PetscErrorCode ierr; 2387 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2388 PetscScalar *array; 2389 2390 PetscFunctionBegin; 2391 *newmat = 0; 2392 2393 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2394 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2395 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2396 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2397 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2398 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2399 2400 mat->factortype = matin->factortype; 2401 mat->preallocated = PETSC_TRUE; 2402 mat->assembled = PETSC_TRUE; 2403 mat->insertmode = NOT_SET_VALUES; 2404 2405 a = (Mat_MPISBAIJ*)mat->data; 2406 a->bs2 = oldmat->bs2; 2407 a->mbs = oldmat->mbs; 2408 a->nbs = oldmat->nbs; 2409 a->Mbs = oldmat->Mbs; 2410 a->Nbs = oldmat->Nbs; 2411 2412 2413 a->size = oldmat->size; 2414 a->rank = oldmat->rank; 2415 a->donotstash = oldmat->donotstash; 2416 a->roworiented = oldmat->roworiented; 2417 a->rowindices = 0; 2418 a->rowvalues = 0; 2419 a->getrowactive = PETSC_FALSE; 2420 a->barray = 0; 2421 a->rstartbs = oldmat->rstartbs; 2422 a->rendbs = oldmat->rendbs; 2423 a->cstartbs = oldmat->cstartbs; 2424 a->cendbs = oldmat->cendbs; 2425 2426 /* hash table stuff */ 2427 a->ht = 0; 2428 a->hd = 0; 2429 a->ht_size = 0; 2430 a->ht_flag = oldmat->ht_flag; 2431 a->ht_fact = oldmat->ht_fact; 2432 a->ht_total_ct = 0; 2433 a->ht_insert_ct = 0; 2434 2435 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2436 if (oldmat->colmap) { 2437 #if defined(PETSC_USE_CTABLE) 2438 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2439 #else 2440 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2441 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2442 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2443 #endif 2444 } else a->colmap = 0; 2445 2446 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2447 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2448 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2449 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2450 } else a->garray = 0; 2451 2452 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2453 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2454 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2455 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2456 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2457 2458 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2459 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2460 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2461 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2462 2463 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2464 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2465 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2466 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2467 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2468 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2469 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2470 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2471 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2472 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2473 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2474 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2475 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2476 2477 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2478 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2479 a->sMvctx = oldmat->sMvctx; 2480 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2481 2482 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2483 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2484 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2485 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2486 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2487 *newmat = mat; 2488 PetscFunctionReturn(0); 2489 } 2490 2491 #undef __FUNCT__ 2492 #define __FUNCT__ "MatLoad_MPISBAIJ" 2493 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2494 { 2495 PetscErrorCode ierr; 2496 PetscInt i,nz,j,rstart,rend; 2497 PetscScalar *vals,*buf; 2498 MPI_Comm comm; 2499 MPI_Status status; 2500 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2501 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2502 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2503 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2504 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2505 PetscInt dcount,kmax,k,nzcount,tmp; 2506 int fd; 2507 2508 PetscFunctionBegin; 2509 /* force binary viewer to load .info file if it has not yet done so */ 2510 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2511 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2512 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2513 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2514 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2515 if (bs < 0) bs = 1; 2516 2517 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2518 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2519 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2520 if (!rank) { 2521 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2522 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2523 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2524 } 2525 2526 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2527 M = header[1]; 2528 N = header[2]; 2529 2530 /* If global sizes are set, check if they are consistent with that given in the file */ 2531 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2532 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2533 2534 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2535 2536 /* 2537 This code adds extra rows to make sure the number of rows is 2538 divisible by the blocksize 2539 */ 2540 Mbs = M/bs; 2541 extra_rows = bs - M + bs*(Mbs); 2542 if (extra_rows == bs) extra_rows = 0; 2543 else Mbs++; 2544 if (extra_rows &&!rank) { 2545 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2546 } 2547 2548 /* determine ownership of all rows */ 2549 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2550 mbs = Mbs/size + ((Mbs % size) > rank); 2551 m = mbs*bs; 2552 } else { /* User Set */ 2553 m = newmat->rmap->n; 2554 mbs = m/bs; 2555 } 2556 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2557 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2558 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2559 rowners[0] = 0; 2560 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2561 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2562 rstart = rowners[rank]; 2563 rend = rowners[rank+1]; 2564 2565 /* distribute row lengths to all processors */ 2566 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2567 if (!rank) { 2568 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2569 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2570 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2571 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2572 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2573 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2574 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2575 } else { 2576 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2577 } 2578 2579 if (!rank) { /* procs[0] */ 2580 /* calculate the number of nonzeros on each processor */ 2581 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2582 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2583 for (i=0; i<size; i++) { 2584 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2585 procsnz[i] += rowlengths[j]; 2586 } 2587 } 2588 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2589 2590 /* determine max buffer needed and allocate it */ 2591 maxnz = 0; 2592 for (i=0; i<size; i++) { 2593 maxnz = PetscMax(maxnz,procsnz[i]); 2594 } 2595 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2596 2597 /* read in my part of the matrix column indices */ 2598 nz = procsnz[0]; 2599 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2600 mycols = ibuf; 2601 if (size == 1) nz -= extra_rows; 2602 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2603 if (size == 1) { 2604 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2605 } 2606 2607 /* read in every ones (except the last) and ship off */ 2608 for (i=1; i<size-1; i++) { 2609 nz = procsnz[i]; 2610 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2611 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2612 } 2613 /* read in the stuff for the last proc */ 2614 if (size != 1) { 2615 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2616 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2617 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2618 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2619 } 2620 ierr = PetscFree(cols);CHKERRQ(ierr); 2621 } else { /* procs[i], i>0 */ 2622 /* determine buffer space needed for message */ 2623 nz = 0; 2624 for (i=0; i<m; i++) nz += locrowlens[i]; 2625 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2626 mycols = ibuf; 2627 /* receive message of column indices*/ 2628 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2629 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2630 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2631 } 2632 2633 /* loop over local rows, determining number of off diagonal entries */ 2634 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2635 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2636 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2637 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2638 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2639 rowcount = 0; 2640 nzcount = 0; 2641 for (i=0; i<mbs; i++) { 2642 dcount = 0; 2643 odcount = 0; 2644 for (j=0; j<bs; j++) { 2645 kmax = locrowlens[rowcount]; 2646 for (k=0; k<kmax; k++) { 2647 tmp = mycols[nzcount++]/bs; /* block col. index */ 2648 if (!mask[tmp]) { 2649 mask[tmp] = 1; 2650 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2651 else masked1[dcount++] = tmp; /* entry in diag portion */ 2652 } 2653 } 2654 rowcount++; 2655 } 2656 2657 dlens[i] = dcount; /* d_nzz[i] */ 2658 odlens[i] = odcount; /* o_nzz[i] */ 2659 2660 /* zero out the mask elements we set */ 2661 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2662 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2663 } 2664 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2665 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2666 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2667 2668 if (!rank) { 2669 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2670 /* read in my part of the matrix numerical values */ 2671 nz = procsnz[0]; 2672 vals = buf; 2673 mycols = ibuf; 2674 if (size == 1) nz -= extra_rows; 2675 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2676 if (size == 1) { 2677 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2678 } 2679 2680 /* insert into matrix */ 2681 jj = rstart*bs; 2682 for (i=0; i<m; i++) { 2683 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2684 mycols += locrowlens[i]; 2685 vals += locrowlens[i]; 2686 jj++; 2687 } 2688 2689 /* read in other processors (except the last one) and ship out */ 2690 for (i=1; i<size-1; i++) { 2691 nz = procsnz[i]; 2692 vals = buf; 2693 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2694 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2695 } 2696 /* the last proc */ 2697 if (size != 1) { 2698 nz = procsnz[i] - extra_rows; 2699 vals = buf; 2700 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2701 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2702 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2703 } 2704 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2705 2706 } else { 2707 /* receive numeric values */ 2708 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2709 2710 /* receive message of values*/ 2711 vals = buf; 2712 mycols = ibuf; 2713 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2714 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2715 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2716 2717 /* insert into matrix */ 2718 jj = rstart*bs; 2719 for (i=0; i<m; i++) { 2720 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2721 mycols += locrowlens[i]; 2722 vals += locrowlens[i]; 2723 jj++; 2724 } 2725 } 2726 2727 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2728 ierr = PetscFree(buf);CHKERRQ(ierr); 2729 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2730 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2731 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2732 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2733 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2734 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2735 PetscFunctionReturn(0); 2736 } 2737 2738 #undef __FUNCT__ 2739 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2740 /*XXXXX@ 2741 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2742 2743 Input Parameters: 2744 . mat - the matrix 2745 . fact - factor 2746 2747 Not Collective on Mat, each process can have a different hash factor 2748 2749 Level: advanced 2750 2751 Notes: 2752 This can also be set by the command line option: -mat_use_hash_table fact 2753 2754 .keywords: matrix, hashtable, factor, HT 2755 2756 .seealso: MatSetOption() 2757 @XXXXX*/ 2758 2759 2760 #undef __FUNCT__ 2761 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2762 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2763 { 2764 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2765 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2766 PetscReal atmp; 2767 PetscReal *work,*svalues,*rvalues; 2768 PetscErrorCode ierr; 2769 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2770 PetscMPIInt rank,size; 2771 PetscInt *rowners_bs,dest,count,source; 2772 PetscScalar *va; 2773 MatScalar *ba; 2774 MPI_Status stat; 2775 2776 PetscFunctionBegin; 2777 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2778 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2779 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2780 2781 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2782 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2783 2784 bs = A->rmap->bs; 2785 mbs = a->mbs; 2786 Mbs = a->Mbs; 2787 ba = b->a; 2788 bi = b->i; 2789 bj = b->j; 2790 2791 /* find ownerships */ 2792 rowners_bs = A->rmap->range; 2793 2794 /* each proc creates an array to be distributed */ 2795 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2796 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2797 2798 /* row_max for B */ 2799 if (rank != size-1) { 2800 for (i=0; i<mbs; i++) { 2801 ncols = bi[1] - bi[0]; bi++; 2802 brow = bs*i; 2803 for (j=0; j<ncols; j++) { 2804 bcol = bs*(*bj); 2805 for (kcol=0; kcol<bs; kcol++) { 2806 col = bcol + kcol; /* local col index */ 2807 col += rowners_bs[rank+1]; /* global col index */ 2808 for (krow=0; krow<bs; krow++) { 2809 atmp = PetscAbsScalar(*ba); ba++; 2810 row = brow + krow; /* local row index */ 2811 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2812 if (work[col] < atmp) work[col] = atmp; 2813 } 2814 } 2815 bj++; 2816 } 2817 } 2818 2819 /* send values to its owners */ 2820 for (dest=rank+1; dest<size; dest++) { 2821 svalues = work + rowners_bs[dest]; 2822 count = rowners_bs[dest+1]-rowners_bs[dest]; 2823 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2824 } 2825 } 2826 2827 /* receive values */ 2828 if (rank) { 2829 rvalues = work; 2830 count = rowners_bs[rank+1]-rowners_bs[rank]; 2831 for (source=0; source<rank; source++) { 2832 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2833 /* process values */ 2834 for (i=0; i<count; i++) { 2835 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2836 } 2837 } 2838 } 2839 2840 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2841 ierr = PetscFree(work);CHKERRQ(ierr); 2842 PetscFunctionReturn(0); 2843 } 2844 2845 #undef __FUNCT__ 2846 #define __FUNCT__ "MatSOR_MPISBAIJ" 2847 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2848 { 2849 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2850 PetscErrorCode ierr; 2851 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2852 PetscScalar *x,*ptr,*from; 2853 Vec bb1; 2854 const PetscScalar *b; 2855 2856 PetscFunctionBegin; 2857 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2858 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2859 2860 if (flag == SOR_APPLY_UPPER) { 2861 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2862 PetscFunctionReturn(0); 2863 } 2864 2865 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2866 if (flag & SOR_ZERO_INITIAL_GUESS) { 2867 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2868 its--; 2869 } 2870 2871 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2872 while (its--) { 2873 2874 /* lower triangular part: slvec0b = - B^T*xx */ 2875 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2876 2877 /* copy xx into slvec0a */ 2878 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2879 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2880 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2881 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2882 2883 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2884 2885 /* copy bb into slvec1a */ 2886 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2887 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2888 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2889 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2890 2891 /* set slvec1b = 0 */ 2892 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2893 2894 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2895 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2896 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2897 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2898 2899 /* upper triangular part: bb1 = bb1 - B*x */ 2900 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2901 2902 /* local diagonal sweep */ 2903 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2904 } 2905 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2906 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2907 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2908 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2909 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2910 } else if (flag & SOR_EISENSTAT) { 2911 Vec xx1; 2912 PetscBool hasop; 2913 const PetscScalar *diag; 2914 PetscScalar *sl,scale = (omega - 2.0)/omega; 2915 PetscInt i,n; 2916 2917 if (!mat->xx1) { 2918 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2919 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2920 } 2921 xx1 = mat->xx1; 2922 bb1 = mat->bb1; 2923 2924 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2925 2926 if (!mat->diag) { 2927 /* this is wrong for same matrix with new nonzero values */ 2928 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2929 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2930 } 2931 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2932 2933 if (hasop) { 2934 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2935 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2936 } else { 2937 /* 2938 These two lines are replaced by code that may be a bit faster for a good compiler 2939 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2940 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2941 */ 2942 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2943 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2944 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2945 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2946 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2947 if (omega == 1.0) { 2948 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2949 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2950 } else { 2951 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2952 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2953 } 2954 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2955 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2956 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2957 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2958 } 2959 2960 /* multiply off-diagonal portion of matrix */ 2961 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2962 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2963 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2964 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2965 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2966 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2967 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2968 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2969 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2970 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2971 2972 /* local sweep */ 2973 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2974 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2975 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2976 PetscFunctionReturn(0); 2977 } 2978 2979 #undef __FUNCT__ 2980 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2981 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2982 { 2983 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2984 PetscErrorCode ierr; 2985 Vec lvec1,bb1; 2986 2987 PetscFunctionBegin; 2988 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2989 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2990 2991 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2992 if (flag & SOR_ZERO_INITIAL_GUESS) { 2993 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2994 its--; 2995 } 2996 2997 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2998 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2999 while (its--) { 3000 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3001 3002 /* lower diagonal part: bb1 = bb - B^T*xx */ 3003 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3004 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3005 3006 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3007 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3008 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3009 3010 /* upper diagonal part: bb1 = bb1 - B*x */ 3011 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3012 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3013 3014 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3015 3016 /* diagonal sweep */ 3017 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3018 } 3019 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3020 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3021 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3022 PetscFunctionReturn(0); 3023 } 3024 3025 #undef __FUNCT__ 3026 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 3027 /*@ 3028 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3029 CSR format the local rows. 3030 3031 Collective on MPI_Comm 3032 3033 Input Parameters: 3034 + comm - MPI communicator 3035 . bs - the block size, only a block size of 1 is supported 3036 . m - number of local rows (Cannot be PETSC_DECIDE) 3037 . n - This value should be the same as the local size used in creating the 3038 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3039 calculated if N is given) For square matrices n is almost always m. 3040 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3041 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3042 . i - row indices 3043 . j - column indices 3044 - a - matrix values 3045 3046 Output Parameter: 3047 . mat - the matrix 3048 3049 Level: intermediate 3050 3051 Notes: 3052 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3053 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3054 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3055 3056 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3057 3058 .keywords: matrix, aij, compressed row, sparse, parallel 3059 3060 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3061 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3062 @*/ 3063 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3064 { 3065 PetscErrorCode ierr; 3066 3067 3068 PetscFunctionBegin; 3069 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3070 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3071 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3072 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3073 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3074 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3075 PetscFunctionReturn(0); 3076 } 3077 3078 3079 #undef __FUNCT__ 3080 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 3081 /*@C 3082 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3083 (the default parallel PETSc format). 3084 3085 Collective on MPI_Comm 3086 3087 Input Parameters: 3088 + B - the matrix 3089 . bs - the block size 3090 . i - the indices into j for the start of each local row (starts with zero) 3091 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3092 - v - optional values in the matrix 3093 3094 Level: developer 3095 3096 .keywords: matrix, aij, compressed row, sparse, parallel 3097 3098 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3099 @*/ 3100 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3101 { 3102 PetscErrorCode ierr; 3103 3104 PetscFunctionBegin; 3105 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3106 PetscFunctionReturn(0); 3107 } 3108 3109 #undef __FUNCT__ 3110 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 3111 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3112 { 3113 PetscErrorCode ierr; 3114 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3115 PetscInt *indx; 3116 PetscScalar *values; 3117 3118 PetscFunctionBegin; 3119 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3120 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3121 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3122 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 3123 PetscInt *bindx,rmax=a->rmax,j; 3124 3125 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3126 mbs = m/bs; Nbs = N/cbs; 3127 if (n == PETSC_DECIDE) { 3128 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3129 } 3130 /* Check sum(n) = Nbs */ 3131 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3132 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3133 3134 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3135 rstart -= mbs; 3136 3137 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3138 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3139 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3140 for (i=0; i<mbs; i++) { 3141 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3142 nnz = nnz/bs; 3143 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3144 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3145 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3146 } 3147 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3148 ierr = PetscFree(bindx);CHKERRQ(ierr); 3149 3150 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3151 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3152 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3153 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 3154 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3155 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3156 } 3157 3158 /* numeric phase */ 3159 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3160 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3161 3162 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3163 for (i=0; i<m; i++) { 3164 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3165 Ii = i + rstart; 3166 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3167 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3168 } 3169 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3170 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3171 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3172 PetscFunctionReturn(0); 3173 } 3174