1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 12 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 19 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 20 PetscFunctionReturn(0); 21 } 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 25 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 26 { 27 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 32 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 33 PetscFunctionReturn(0); 34 } 35 36 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 37 { \ 38 \ 39 brow = row/bs; \ 40 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 41 rmax = aimax[brow]; nrow = ailen[brow]; \ 42 bcol = col/bs; \ 43 ridx = row % bs; cidx = col % bs; \ 44 low = 0; high = nrow; \ 45 while (high-low > 3) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > bcol) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > bcol) break; \ 52 if (rp[_i] == bcol) { \ 53 bap = ap + bs2*_i + bs*cidx + ridx; \ 54 if (addv == ADD_VALUES) *bap += value; \ 55 else *bap = value; \ 56 goto a_noinsert; \ 57 } \ 58 } \ 59 if (a->nonew == 1) goto a_noinsert; \ 60 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 61 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 62 N = nrow++ - 1; \ 63 /* shift up all the later entries in this row */ \ 64 for (ii=N; ii>=_i; ii--) { \ 65 rp[ii+1] = rp[ii]; \ 66 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 67 } \ 68 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 69 rp[_i] = bcol; \ 70 ap[bs2*_i + bs*cidx + ridx] = value; \ 71 A->nonzerostate++;\ 72 a_noinsert:; \ 73 ailen[brow] = nrow; \ 74 } 75 76 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 77 { \ 78 brow = row/bs; \ 79 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 80 rmax = bimax[brow]; nrow = bilen[brow]; \ 81 bcol = col/bs; \ 82 ridx = row % bs; cidx = col % bs; \ 83 low = 0; high = nrow; \ 84 while (high-low > 3) { \ 85 t = (low+high)/2; \ 86 if (rp[t] > bcol) high = t; \ 87 else low = t; \ 88 } \ 89 for (_i=low; _i<high; _i++) { \ 90 if (rp[_i] > bcol) break; \ 91 if (rp[_i] == bcol) { \ 92 bap = ap + bs2*_i + bs*cidx + ridx; \ 93 if (addv == ADD_VALUES) *bap += value; \ 94 else *bap = value; \ 95 goto b_noinsert; \ 96 } \ 97 } \ 98 if (b->nonew == 1) goto b_noinsert; \ 99 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 100 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for (ii=N; ii>=_i; ii--) { \ 104 rp[ii+1] = rp[ii]; \ 105 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 106 } \ 107 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 B->nonzerostate++;\ 111 b_noinsert:; \ 112 bilen[brow] = nrow; \ 113 } 114 115 /* Only add/insert a(i,j) with i<=j (blocks). 116 Any a(i,j) with i>j input by user is ingored. 117 */ 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatSetValues_MPISBAIJ" 120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 121 { 122 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 123 MatScalar value; 124 PetscBool roworiented = baij->roworiented; 125 PetscErrorCode ierr; 126 PetscInt i,j,row,col; 127 PetscInt rstart_orig=mat->rmap->rstart; 128 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 129 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 130 131 /* Some Variables required in the macro */ 132 Mat A = baij->A; 133 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 134 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 135 MatScalar *aa =a->a; 136 137 Mat B = baij->B; 138 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 139 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 140 MatScalar *ba =b->a; 141 142 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 143 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 144 MatScalar *ap,*bap; 145 146 /* for stash */ 147 PetscInt n_loc, *in_loc = NULL; 148 MatScalar *v_loc = NULL; 149 150 PetscFunctionBegin; 151 if (!baij->donotstash) { 152 if (n > baij->n_loc) { 153 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 154 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 155 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 156 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 157 158 baij->n_loc = n; 159 } 160 in_loc = baij->in_loc; 161 v_loc = baij->v_loc; 162 } 163 164 for (i=0; i<m; i++) { 165 if (im[i] < 0) continue; 166 #if defined(PETSC_USE_DEBUG) 167 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 168 #endif 169 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 170 row = im[i] - rstart_orig; /* local row index */ 171 for (j=0; j<n; j++) { 172 if (im[i]/bs > in[j]/bs) { 173 if (a->ignore_ltriangular) { 174 continue; /* ignore lower triangular blocks */ 175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 176 } 177 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 178 col = in[j] - cstart_orig; /* local col index */ 179 brow = row/bs; bcol = col/bs; 180 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 181 if (roworiented) value = v[i*n+j]; 182 else value = v[i+j*m]; 183 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 184 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 185 } else if (in[j] < 0) continue; 186 #if defined(PETSC_USE_DEBUG) 187 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 188 #endif 189 else { /* off-diag entry (B) */ 190 if (mat->was_assembled) { 191 if (!baij->colmap) { 192 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 193 } 194 #if defined(PETSC_USE_CTABLE) 195 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 196 col = col - 1; 197 #else 198 col = baij->colmap[in[j]/bs] - 1; 199 #endif 200 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 201 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 202 col = in[j]; 203 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 204 B = baij->B; 205 b = (Mat_SeqBAIJ*)(B)->data; 206 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 207 ba = b->a; 208 } else col += in[j]%bs; 209 } else col = in[j]; 210 if (roworiented) value = v[i*n+j]; 211 else value = v[i+j*m]; 212 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 213 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 } 216 } else { /* off processor entry */ 217 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 218 if (!baij->donotstash) { 219 mat->assembled = PETSC_FALSE; 220 n_loc = 0; 221 for (j=0; j<n; j++) { 222 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 223 in_loc[n_loc] = in[j]; 224 if (roworiented) { 225 v_loc[n_loc] = v[i*n+j]; 226 } else { 227 v_loc[n_loc] = v[j*m+i]; 228 } 229 n_loc++; 230 } 231 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined" 240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 241 { 242 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 243 PetscErrorCode ierr; 244 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 245 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 246 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 247 PetscBool roworiented=a->roworiented; 248 const PetscScalar *value = v; 249 MatScalar *ap,*aa = a->a,*bap; 250 251 PetscFunctionBegin; 252 if (col < row) { 253 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 254 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 255 } 256 rp = aj + ai[row]; 257 ap = aa + bs2*ai[row]; 258 rmax = imax[row]; 259 nrow = ailen[row]; 260 value = v; 261 low = 0; 262 high = nrow; 263 264 while (high-low > 7) { 265 t = (low+high)/2; 266 if (rp[t] > col) high = t; 267 else low = t; 268 } 269 for (i=low; i<high; i++) { 270 if (rp[i] > col) break; 271 if (rp[i] == col) { 272 bap = ap + bs2*i; 273 if (roworiented) { 274 if (is == ADD_VALUES) { 275 for (ii=0; ii<bs; ii++) { 276 for (jj=ii; jj<bs2; jj+=bs) { 277 bap[jj] += *value++; 278 } 279 } 280 } else { 281 for (ii=0; ii<bs; ii++) { 282 for (jj=ii; jj<bs2; jj+=bs) { 283 bap[jj] = *value++; 284 } 285 } 286 } 287 } else { 288 if (is == ADD_VALUES) { 289 for (ii=0; ii<bs; ii++) { 290 for (jj=0; jj<bs; jj++) { 291 *bap++ += *value++; 292 } 293 } 294 } else { 295 for (ii=0; ii<bs; ii++) { 296 for (jj=0; jj<bs; jj++) { 297 *bap++ = *value++; 298 } 299 } 300 } 301 } 302 goto noinsert2; 303 } 304 } 305 if (nonew == 1) goto noinsert2; 306 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 307 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 308 N = nrow++ - 1; high++; 309 /* shift up all the later entries in this row */ 310 for (ii=N; ii>=i; ii--) { 311 rp[ii+1] = rp[ii]; 312 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 313 } 314 if (N >= i) { 315 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 316 } 317 rp[i] = col; 318 bap = ap + bs2*i; 319 if (roworiented) { 320 for (ii=0; ii<bs; ii++) { 321 for (jj=ii; jj<bs2; jj+=bs) { 322 bap[jj] = *value++; 323 } 324 } 325 } else { 326 for (ii=0; ii<bs; ii++) { 327 for (jj=0; jj<bs; jj++) { 328 *bap++ = *value++; 329 } 330 } 331 } 332 noinsert2:; 333 ailen[row] = nrow; 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined" 339 /* 340 This routine is exactly duplicated in mpibaij.c 341 */ 342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 343 { 344 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 345 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 346 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 347 PetscErrorCode ierr; 348 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 349 PetscBool roworiented=a->roworiented; 350 const PetscScalar *value = v; 351 MatScalar *ap,*aa = a->a,*bap; 352 353 PetscFunctionBegin; 354 rp = aj + ai[row]; 355 ap = aa + bs2*ai[row]; 356 rmax = imax[row]; 357 nrow = ailen[row]; 358 low = 0; 359 high = nrow; 360 value = v; 361 while (high-low > 7) { 362 t = (low+high)/2; 363 if (rp[t] > col) high = t; 364 else low = t; 365 } 366 for (i=low; i<high; i++) { 367 if (rp[i] > col) break; 368 if (rp[i] == col) { 369 bap = ap + bs2*i; 370 if (roworiented) { 371 if (is == ADD_VALUES) { 372 for (ii=0; ii<bs; ii++) { 373 for (jj=ii; jj<bs2; jj+=bs) { 374 bap[jj] += *value++; 375 } 376 } 377 } else { 378 for (ii=0; ii<bs; ii++) { 379 for (jj=ii; jj<bs2; jj+=bs) { 380 bap[jj] = *value++; 381 } 382 } 383 } 384 } else { 385 if (is == ADD_VALUES) { 386 for (ii=0; ii<bs; ii++,value+=bs) { 387 for (jj=0; jj<bs; jj++) { 388 bap[jj] += value[jj]; 389 } 390 bap += bs; 391 } 392 } else { 393 for (ii=0; ii<bs; ii++,value+=bs) { 394 for (jj=0; jj<bs; jj++) { 395 bap[jj] = value[jj]; 396 } 397 bap += bs; 398 } 399 } 400 } 401 goto noinsert2; 402 } 403 } 404 if (nonew == 1) goto noinsert2; 405 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 406 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 407 N = nrow++ - 1; high++; 408 /* shift up all the later entries in this row */ 409 for (ii=N; ii>=i; ii--) { 410 rp[ii+1] = rp[ii]; 411 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 412 } 413 if (N >= i) { 414 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 415 } 416 rp[i] = col; 417 bap = ap + bs2*i; 418 if (roworiented) { 419 for (ii=0; ii<bs; ii++) { 420 for (jj=ii; jj<bs2; jj+=bs) { 421 bap[jj] = *value++; 422 } 423 } 424 } else { 425 for (ii=0; ii<bs; ii++) { 426 for (jj=0; jj<bs; jj++) { 427 *bap++ = *value++; 428 } 429 } 430 } 431 noinsert2:; 432 ailen[row] = nrow; 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNCT__ 437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 438 /* 439 This routine could be optimized by removing the need for the block copy below and passing stride information 440 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 441 */ 442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 443 { 444 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 445 const MatScalar *value; 446 MatScalar *barray =baij->barray; 447 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 448 PetscErrorCode ierr; 449 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 450 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 451 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 452 453 PetscFunctionBegin; 454 if (!barray) { 455 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 456 baij->barray = barray; 457 } 458 459 if (roworiented) { 460 stepval = (n-1)*bs; 461 } else { 462 stepval = (m-1)*bs; 463 } 464 for (i=0; i<m; i++) { 465 if (im[i] < 0) continue; 466 #if defined(PETSC_USE_DEBUG) 467 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 468 #endif 469 if (im[i] >= rstart && im[i] < rend) { 470 row = im[i] - rstart; 471 for (j=0; j<n; j++) { 472 if (im[i] > in[j]) { 473 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 474 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 475 } 476 /* If NumCol = 1 then a copy is not required */ 477 if ((roworiented) && (n == 1)) { 478 barray = (MatScalar*) v + i*bs2; 479 } else if ((!roworiented) && (m == 1)) { 480 barray = (MatScalar*) v + j*bs2; 481 } else { /* Here a copy is required */ 482 if (roworiented) { 483 value = v + i*(stepval+bs)*bs + j*bs; 484 } else { 485 value = v + j*(stepval+bs)*bs + i*bs; 486 } 487 for (ii=0; ii<bs; ii++,value+=stepval) { 488 for (jj=0; jj<bs; jj++) { 489 *barray++ = *value++; 490 } 491 } 492 barray -=bs2; 493 } 494 495 if (in[j] >= cstart && in[j] < cend) { 496 col = in[j] - cstart; 497 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 498 } else if (in[j] < 0) continue; 499 #if defined(PETSC_USE_DEBUG) 500 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 501 #endif 502 else { 503 if (mat->was_assembled) { 504 if (!baij->colmap) { 505 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 506 } 507 508 #if defined(PETSC_USE_DEBUG) 509 #if defined(PETSC_USE_CTABLE) 510 { PetscInt data; 511 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 512 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 513 } 514 #else 515 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 516 #endif 517 #endif 518 #if defined(PETSC_USE_CTABLE) 519 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col = (col - 1)/bs; 521 #else 522 col = (baij->colmap[in[j]] - 1)/bs; 523 #endif 524 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 } 528 } else col = in[j]; 529 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 530 } 531 } 532 } else { 533 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 534 if (!baij->donotstash) { 535 if (roworiented) { 536 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 537 } else { 538 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 539 } 540 } 541 } 542 } 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_MPISBAIJ" 548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 549 { 550 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 551 PetscErrorCode ierr; 552 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 553 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 554 555 PetscFunctionBegin; 556 for (i=0; i<m; i++) { 557 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 558 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 559 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 560 row = idxm[i] - bsrstart; 561 for (j=0; j<n; j++) { 562 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 563 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 564 if (idxn[j] >= bscstart && idxn[j] < bscend) { 565 col = idxn[j] - bscstart; 566 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 567 } else { 568 if (!baij->colmap) { 569 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 570 } 571 #if defined(PETSC_USE_CTABLE) 572 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 573 data--; 574 #else 575 data = baij->colmap[idxn[j]/bs]-1; 576 #endif 577 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 578 else { 579 col = data + idxn[j]%bs; 580 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } 582 } 583 } 584 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatNorm_MPISBAIJ" 591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 592 { 593 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscReal sum[2],*lnorm2; 596 597 PetscFunctionBegin; 598 if (baij->size == 1) { 599 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 600 } else { 601 if (type == NORM_FROBENIUS) { 602 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 603 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 605 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 606 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 607 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 608 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 609 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 610 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 611 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 612 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 613 PetscReal *rsum,*rsum2,vabs; 614 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 615 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 616 MatScalar *v; 617 618 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 619 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 620 /* Amat */ 621 v = amat->a; jj = amat->j; 622 for (brow=0; brow<mbs; brow++) { 623 grow = bs*(rstart + brow); 624 nz = amat->i[brow+1] - amat->i[brow]; 625 for (bcol=0; bcol<nz; bcol++) { 626 gcol = bs*(rstart + *jj); jj++; 627 for (col=0; col<bs; col++) { 628 for (row=0; row<bs; row++) { 629 vabs = PetscAbsScalar(*v); v++; 630 rsum[gcol+col] += vabs; 631 /* non-diagonal block */ 632 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 633 } 634 } 635 } 636 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 637 } 638 /* Bmat */ 639 v = bmat->a; jj = bmat->j; 640 for (brow=0; brow<mbs; brow++) { 641 grow = bs*(rstart + brow); 642 nz = bmat->i[brow+1] - bmat->i[brow]; 643 for (bcol=0; bcol<nz; bcol++) { 644 gcol = bs*garray[*jj]; jj++; 645 for (col=0; col<bs; col++) { 646 for (row=0; row<bs; row++) { 647 vabs = PetscAbsScalar(*v); v++; 648 rsum[gcol+col] += vabs; 649 rsum[grow+row] += vabs; 650 } 651 } 652 } 653 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 654 } 655 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 656 *norm = 0.0; 657 for (col=0; col<mat->cmap->N; col++) { 658 if (rsum2[col] > *norm) *norm = rsum2[col]; 659 } 660 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 661 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 662 } 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 668 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 671 PetscErrorCode ierr; 672 PetscInt nstash,reallocs; 673 674 PetscFunctionBegin; 675 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 676 677 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 678 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 679 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 680 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 681 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 682 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 683 PetscFunctionReturn(0); 684 } 685 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 688 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 689 { 690 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 691 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 692 PetscErrorCode ierr; 693 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 694 PetscInt *row,*col; 695 PetscBool other_disassembled; 696 PetscMPIInt n; 697 PetscBool r1,r2,r3; 698 MatScalar *val; 699 700 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 701 PetscFunctionBegin; 702 if (!baij->donotstash && !mat->nooffprocentries) { 703 while (1) { 704 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 705 if (!flg) break; 706 707 for (i=0; i<n;) { 708 /* Now identify the consecutive vals belonging to the same row */ 709 for (j=i,rstart=row[j]; j<n; j++) { 710 if (row[j] != rstart) break; 711 } 712 if (j < n) ncols = j-i; 713 else ncols = n-i; 714 /* Now assemble all these values with a single function call */ 715 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 716 i = j; 717 } 718 } 719 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 720 /* Now process the block-stash. Since the values are stashed column-oriented, 721 set the roworiented flag to column oriented, and after MatSetValues() 722 restore the original flags */ 723 r1 = baij->roworiented; 724 r2 = a->roworiented; 725 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 726 727 baij->roworiented = PETSC_FALSE; 728 a->roworiented = PETSC_FALSE; 729 730 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 731 while (1) { 732 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 733 if (!flg) break; 734 735 for (i=0; i<n;) { 736 /* Now identify the consecutive vals belonging to the same row */ 737 for (j=i,rstart=row[j]; j<n; j++) { 738 if (row[j] != rstart) break; 739 } 740 if (j < n) ncols = j-i; 741 else ncols = n-i; 742 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 743 i = j; 744 } 745 } 746 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 747 748 baij->roworiented = r1; 749 a->roworiented = r2; 750 751 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 752 } 753 754 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 755 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 756 757 /* determine if any processor has disassembled, if so we must 758 also disassemble ourselfs, in order that we may reassemble. */ 759 /* 760 if nonzero structure of submatrix B cannot change then we know that 761 no processor disassembled thus we can skip this stuff 762 */ 763 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 764 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 765 if (mat->was_assembled && !other_disassembled) { 766 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 767 } 768 } 769 770 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 771 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 772 } 773 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 774 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 775 776 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 777 778 baij->rowvalues = 0; 779 780 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 781 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 782 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 783 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 784 } 785 PetscFunctionReturn(0); 786 } 787 788 extern PetscErrorCode MatView_SeqSBAIJ(Mat,PetscViewer); 789 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 790 #include <petscdraw.h> 791 #undef __FUNCT__ 792 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 793 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 794 { 795 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 796 PetscErrorCode ierr; 797 PetscInt bs = mat->rmap->bs; 798 PetscMPIInt rank = baij->rank; 799 PetscBool iascii,isdraw; 800 PetscViewer sviewer; 801 PetscViewerFormat format; 802 803 PetscFunctionBegin; 804 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 805 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 806 if (iascii) { 807 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 808 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 809 MatInfo info; 810 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 811 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 812 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 813 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 814 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 815 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 816 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 817 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 818 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 819 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 820 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 821 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 822 PetscFunctionReturn(0); 823 } else if (format == PETSC_VIEWER_ASCII_INFO) { 824 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 825 PetscFunctionReturn(0); 826 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 827 PetscFunctionReturn(0); 828 } 829 } 830 831 if (isdraw) { 832 PetscDraw draw; 833 PetscBool isnull; 834 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 835 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 836 if (isnull) PetscFunctionReturn(0); 837 } 838 839 { 840 /* assemble the entire matrix onto first processor. */ 841 Mat A; 842 Mat_SeqSBAIJ *Aloc; 843 Mat_SeqBAIJ *Bloc; 844 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 845 MatScalar *a; 846 const char *matname; 847 848 /* Should this be the same type as mat? */ 849 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 850 if (!rank) { 851 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 852 } else { 853 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 854 } 855 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 856 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 857 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 858 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 859 860 /* copy over the A part */ 861 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 862 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 863 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 864 865 for (i=0; i<mbs; i++) { 866 rvals[0] = bs*(baij->rstartbs + i); 867 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 868 for (j=ai[i]; j<ai[i+1]; j++) { 869 col = (baij->cstartbs+aj[j])*bs; 870 for (k=0; k<bs; k++) { 871 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 872 col++; 873 a += bs; 874 } 875 } 876 } 877 /* copy over the B part */ 878 Bloc = (Mat_SeqBAIJ*)baij->B->data; 879 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 880 for (i=0; i<mbs; i++) { 881 882 rvals[0] = bs*(baij->rstartbs + i); 883 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 884 for (j=ai[i]; j<ai[i+1]; j++) { 885 col = baij->garray[aj[j]]*bs; 886 for (k=0; k<bs; k++) { 887 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 888 col++; 889 a += bs; 890 } 891 } 892 } 893 ierr = PetscFree(rvals);CHKERRQ(ierr); 894 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 895 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 896 /* 897 Everyone has to call to draw the matrix since the graphics waits are 898 synchronized across all processors that share the PetscDraw object 899 */ 900 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 901 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 902 if (!rank) { 903 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 904 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 905 } 906 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 907 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 908 ierr = MatDestroy(&A);CHKERRQ(ierr); 909 } 910 PetscFunctionReturn(0); 911 } 912 913 #undef __FUNCT__ 914 #define __FUNCT__ "MatView_MPISBAIJ_Binary" 915 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer) 916 { 917 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 918 Mat_SeqSBAIJ *A = (Mat_SeqSBAIJ*)a->A->data; 919 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 920 PetscErrorCode ierr; 921 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 922 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 923 int fd; 924 PetscScalar *column_values; 925 FILE *file; 926 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 927 PetscInt message_count,flowcontrolcount; 928 929 PetscFunctionBegin; 930 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 931 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 932 nz = bs2*(A->nz + B->nz); 933 rlen = mat->rmap->n; 934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 935 if (!rank) { 936 header[0] = MAT_FILE_CLASSID; 937 header[1] = mat->rmap->N; 938 header[2] = mat->cmap->N; 939 940 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 941 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 942 /* get largest number of rows any processor has */ 943 range = mat->rmap->range; 944 for (i=1; i<size; i++) { 945 rlen = PetscMax(rlen,range[i+1] - range[i]); 946 } 947 } else { 948 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 949 } 950 951 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 952 /* compute lengths of each row */ 953 for (i=0; i<a->mbs; i++) { 954 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 955 } 956 /* store the row lengths to the file */ 957 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 958 if (!rank) { 959 MPI_Status status; 960 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 961 rlen = (range[1] - range[0])/bs; 962 for (i=0; i<rlen; i++) { 963 for (j=0; j<bs; j++) { 964 row_lens[i*bs+j] = bs*crow_lens[i]; 965 } 966 } 967 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 968 for (i=1; i<size; i++) { 969 rlen = (range[i+1] - range[i])/bs; 970 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 971 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 972 for (k=0; k<rlen; k++) { 973 for (j=0; j<bs; j++) { 974 row_lens[k*bs+j] = bs*crow_lens[k]; 975 } 976 } 977 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 978 } 979 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 980 ierr = PetscFree(row_lens);CHKERRQ(ierr); 981 } else { 982 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 983 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 984 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 985 } 986 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 987 988 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 989 information needed to make it for each row from a block row. This does require more communication but still not more than 990 the communication needed for the nonzero values */ 991 nzmax = nz; /* space a largest processor needs */ 992 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 993 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 994 cnt = 0; 995 for (i=0; i<a->mbs; i++) { 996 pcnt = cnt; 997 for (j=B->i[i]; j<B->i[i+1]; j++) { 998 if ((col = garray[B->j[j]]) > cstart) break; 999 for (l=0; l<bs; l++) { 1000 column_indices[cnt++] = bs*col+l; 1001 } 1002 } 1003 for (k=A->i[i]; k<A->i[i+1]; k++) { 1004 for (l=0; l<bs; l++) { 1005 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1006 } 1007 } 1008 for (; j<B->i[i+1]; j++) { 1009 for (l=0; l<bs; l++) { 1010 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1011 } 1012 } 1013 len = cnt - pcnt; 1014 for (k=1; k<bs; k++) { 1015 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1016 cnt += len; 1017 } 1018 } 1019 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1020 1021 /* store the columns to the file */ 1022 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1023 if (!rank) { 1024 MPI_Status status; 1025 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1026 for (i=1; i<size; i++) { 1027 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1028 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1029 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1030 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1031 } 1032 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1033 } else { 1034 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1035 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1036 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1037 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1038 } 1039 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1040 1041 /* load up the numerical values */ 1042 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1043 cnt = 0; 1044 for (i=0; i<a->mbs; i++) { 1045 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1046 for (j=B->i[i]; j<B->i[i+1]; j++) { 1047 if (garray[B->j[j]] > cstart) break; 1048 for (l=0; l<bs; l++) { 1049 for (ll=0; ll<bs; ll++) { 1050 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1051 } 1052 } 1053 cnt += bs; 1054 } 1055 for (k=A->i[i]; k<A->i[i+1]; k++) { 1056 for (l=0; l<bs; l++) { 1057 for (ll=0; ll<bs; ll++) { 1058 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1059 } 1060 } 1061 cnt += bs; 1062 } 1063 for (; j<B->i[i+1]; j++) { 1064 for (l=0; l<bs; l++) { 1065 for (ll=0; ll<bs; ll++) { 1066 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1067 } 1068 } 1069 cnt += bs; 1070 } 1071 cnt += (bs-1)*rlen; 1072 } 1073 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1074 1075 /* store the column values to the file */ 1076 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1077 if (!rank) { 1078 MPI_Status status; 1079 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1080 for (i=1; i<size; i++) { 1081 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1082 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1083 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1084 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1085 } 1086 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1087 } else { 1088 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1089 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1090 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1091 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1092 } 1093 ierr = PetscFree(column_values);CHKERRQ(ierr); 1094 1095 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1096 if (file) { 1097 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1098 } 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatView_MPISBAIJ" 1104 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 1105 { 1106 PetscErrorCode ierr; 1107 PetscBool iascii,isdraw,issocket,isbinary; 1108 1109 PetscFunctionBegin; 1110 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1111 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1112 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1113 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1114 if (iascii || isdraw || issocket) { 1115 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1116 } else if (isbinary) { 1117 ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1118 } 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatDestroy_MPISBAIJ" 1124 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1125 { 1126 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 #if defined(PETSC_USE_LOG) 1131 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1132 #endif 1133 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1134 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1135 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1136 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1137 #if defined(PETSC_USE_CTABLE) 1138 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1139 #else 1140 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1141 #endif 1142 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1143 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1144 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1145 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 1146 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 1147 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 1148 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 1149 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 1150 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 1151 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1152 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1153 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1154 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 1155 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 1156 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1157 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1158 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1159 #endif 1160 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 1161 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1162 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1163 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1164 1165 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 1170 #if defined(PETSC_HAVE_ELEMENTAL) 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 1172 #endif 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 1178 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1179 { 1180 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1181 PetscErrorCode ierr; 1182 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1183 PetscScalar *from; 1184 const PetscScalar *x; 1185 1186 PetscFunctionBegin; 1187 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1188 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1189 1190 /* diagonal part */ 1191 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1192 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1193 1194 /* subdiagonal part */ 1195 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1196 1197 /* copy x into the vec slvec0 */ 1198 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1199 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1200 1201 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1202 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1203 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1204 1205 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1207 /* supperdiagonal part */ 1208 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatMult_MPISBAIJ" 1214 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1215 { 1216 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1217 PetscErrorCode ierr; 1218 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1219 PetscScalar *from; 1220 const PetscScalar *x; 1221 1222 PetscFunctionBegin; 1223 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1224 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1225 1226 /* diagonal part */ 1227 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1228 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1229 1230 /* subdiagonal part */ 1231 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1232 1233 /* copy x into the vec slvec0 */ 1234 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1235 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1236 1237 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1238 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1239 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1240 1241 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1242 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1243 /* supperdiagonal part */ 1244 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1250 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1251 { 1252 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1253 PetscErrorCode ierr; 1254 PetscInt nt; 1255 1256 PetscFunctionBegin; 1257 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1258 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1259 1260 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1261 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1262 1263 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 /* do diagonal part */ 1265 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1266 /* do supperdiagonal part */ 1267 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1269 /* do subdiagonal part */ 1270 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1271 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1272 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1278 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1279 { 1280 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1281 PetscErrorCode ierr; 1282 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1283 PetscScalar *from,zero=0.0; 1284 const PetscScalar *x; 1285 1286 PetscFunctionBegin; 1287 /* 1288 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1289 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1290 */ 1291 /* diagonal part */ 1292 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1293 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1294 1295 /* subdiagonal part */ 1296 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1297 1298 /* copy x into the vec slvec0 */ 1299 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1300 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1301 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1302 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1303 1304 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1305 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1306 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1307 1308 /* supperdiagonal part */ 1309 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1315 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1316 { 1317 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1318 PetscErrorCode ierr; 1319 1320 PetscFunctionBegin; 1321 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1322 /* do diagonal part */ 1323 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1324 /* do supperdiagonal part */ 1325 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1326 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1327 1328 /* do subdiagonal part */ 1329 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1330 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1331 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 /* 1336 This only works correctly for square matrices where the subblock A->A is the 1337 diagonal block 1338 */ 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1341 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1342 { 1343 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1344 PetscErrorCode ierr; 1345 1346 PetscFunctionBegin; 1347 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1348 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatScale_MPISBAIJ" 1354 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1355 { 1356 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1357 PetscErrorCode ierr; 1358 1359 PetscFunctionBegin; 1360 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1361 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1367 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1368 { 1369 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1370 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1371 PetscErrorCode ierr; 1372 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1373 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1374 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1375 1376 PetscFunctionBegin; 1377 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1378 mat->getrowactive = PETSC_TRUE; 1379 1380 if (!mat->rowvalues && (idx || v)) { 1381 /* 1382 allocate enough space to hold information from the longest row. 1383 */ 1384 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1385 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1386 PetscInt max = 1,mbs = mat->mbs,tmp; 1387 for (i=0; i<mbs; i++) { 1388 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1389 if (max < tmp) max = tmp; 1390 } 1391 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1392 } 1393 1394 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1395 lrow = row - brstart; /* local row index */ 1396 1397 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1398 if (!v) {pvA = 0; pvB = 0;} 1399 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1400 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1401 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1402 nztot = nzA + nzB; 1403 1404 cmap = mat->garray; 1405 if (v || idx) { 1406 if (nztot) { 1407 /* Sort by increasing column numbers, assuming A and B already sorted */ 1408 PetscInt imark = -1; 1409 if (v) { 1410 *v = v_p = mat->rowvalues; 1411 for (i=0; i<nzB; i++) { 1412 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1413 else break; 1414 } 1415 imark = i; 1416 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1417 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1418 } 1419 if (idx) { 1420 *idx = idx_p = mat->rowindices; 1421 if (imark > -1) { 1422 for (i=0; i<imark; i++) { 1423 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1424 } 1425 } else { 1426 for (i=0; i<nzB; i++) { 1427 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1428 else break; 1429 } 1430 imark = i; 1431 } 1432 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1433 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1434 } 1435 } else { 1436 if (idx) *idx = 0; 1437 if (v) *v = 0; 1438 } 1439 } 1440 *nz = nztot; 1441 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1442 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1448 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1449 { 1450 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1451 1452 PetscFunctionBegin; 1453 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1454 baij->getrowactive = PETSC_FALSE; 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1460 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1461 { 1462 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1463 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1464 1465 PetscFunctionBegin; 1466 aA->getrow_utriangular = PETSC_TRUE; 1467 PetscFunctionReturn(0); 1468 } 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1471 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1472 { 1473 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1474 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1475 1476 PetscFunctionBegin; 1477 aA->getrow_utriangular = PETSC_FALSE; 1478 PetscFunctionReturn(0); 1479 } 1480 1481 #undef __FUNCT__ 1482 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1483 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1484 { 1485 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1486 PetscErrorCode ierr; 1487 1488 PetscFunctionBegin; 1489 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1490 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1491 PetscFunctionReturn(0); 1492 } 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1496 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1497 { 1498 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1499 PetscErrorCode ierr; 1500 1501 PetscFunctionBegin; 1502 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1503 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1504 PetscFunctionReturn(0); 1505 } 1506 1507 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ() 1508 Input: isrow - distributed(parallel), 1509 iscol_local - locally owned (seq) 1510 */ 1511 #undef __FUNCT__ 1512 #define __FUNCT__ "ISEqual_private" 1513 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1514 { 1515 PetscErrorCode ierr; 1516 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1517 const PetscInt *ptr1,*ptr2; 1518 1519 PetscFunctionBegin; 1520 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1521 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1522 if (sz1 > sz2) { 1523 *flg = PETSC_FALSE; 1524 PetscFunctionReturn(0); 1525 } 1526 1527 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1528 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1529 1530 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1531 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1532 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1533 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1534 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1535 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1536 1537 nmatch=0; 1538 k = 0; 1539 for (i=0; i<sz1; i++){ 1540 for (j=k; j<sz2; j++){ 1541 if (a1[i] == a2[j]) { 1542 k = j; nmatch++; 1543 break; 1544 } 1545 } 1546 } 1547 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1548 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1549 ierr = PetscFree(a1);CHKERRQ(ierr); 1550 ierr = PetscFree(a2);CHKERRQ(ierr); 1551 if (nmatch < sz1) { 1552 *flg = PETSC_FALSE; 1553 } else { 1554 *flg = PETSC_TRUE; 1555 } 1556 PetscFunctionReturn(0); 1557 } 1558 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ" 1561 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1562 { 1563 PetscErrorCode ierr; 1564 IS iscol_local; 1565 PetscInt csize; 1566 PetscBool isequal; 1567 1568 PetscFunctionBegin; 1569 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1570 if (call == MAT_REUSE_MATRIX) { 1571 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1572 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1573 } else { 1574 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1575 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1576 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1577 } 1578 1579 /* now call MatGetSubMatrix_MPIBAIJ() */ 1580 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1581 if (call == MAT_INITIAL_MATRIX) { 1582 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1583 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1584 } 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1590 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1591 { 1592 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1593 PetscErrorCode ierr; 1594 1595 PetscFunctionBegin; 1596 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1597 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1598 PetscFunctionReturn(0); 1599 } 1600 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1603 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1604 { 1605 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1606 Mat A = a->A,B = a->B; 1607 PetscErrorCode ierr; 1608 PetscReal isend[5],irecv[5]; 1609 1610 PetscFunctionBegin; 1611 info->block_size = (PetscReal)matin->rmap->bs; 1612 1613 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1614 1615 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1616 isend[3] = info->memory; isend[4] = info->mallocs; 1617 1618 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1619 1620 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1621 isend[3] += info->memory; isend[4] += info->mallocs; 1622 if (flag == MAT_LOCAL) { 1623 info->nz_used = isend[0]; 1624 info->nz_allocated = isend[1]; 1625 info->nz_unneeded = isend[2]; 1626 info->memory = isend[3]; 1627 info->mallocs = isend[4]; 1628 } else if (flag == MAT_GLOBAL_MAX) { 1629 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1630 1631 info->nz_used = irecv[0]; 1632 info->nz_allocated = irecv[1]; 1633 info->nz_unneeded = irecv[2]; 1634 info->memory = irecv[3]; 1635 info->mallocs = irecv[4]; 1636 } else if (flag == MAT_GLOBAL_SUM) { 1637 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1638 1639 info->nz_used = irecv[0]; 1640 info->nz_allocated = irecv[1]; 1641 info->nz_unneeded = irecv[2]; 1642 info->memory = irecv[3]; 1643 info->mallocs = irecv[4]; 1644 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1645 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1646 info->fill_ratio_needed = 0; 1647 info->factor_mallocs = 0; 1648 PetscFunctionReturn(0); 1649 } 1650 1651 #undef __FUNCT__ 1652 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1653 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1654 { 1655 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1656 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1657 PetscErrorCode ierr; 1658 1659 PetscFunctionBegin; 1660 switch (op) { 1661 case MAT_NEW_NONZERO_LOCATIONS: 1662 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1663 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1664 case MAT_KEEP_NONZERO_PATTERN: 1665 case MAT_NEW_NONZERO_LOCATION_ERR: 1666 MatCheckPreallocated(A,1); 1667 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1668 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1669 break; 1670 case MAT_ROW_ORIENTED: 1671 MatCheckPreallocated(A,1); 1672 a->roworiented = flg; 1673 1674 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1675 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1676 break; 1677 case MAT_NEW_DIAGONALS: 1678 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1679 break; 1680 case MAT_IGNORE_OFF_PROC_ENTRIES: 1681 a->donotstash = flg; 1682 break; 1683 case MAT_USE_HASH_TABLE: 1684 a->ht_flag = flg; 1685 break; 1686 case MAT_HERMITIAN: 1687 MatCheckPreallocated(A,1); 1688 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1689 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1690 1691 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1692 break; 1693 case MAT_SPD: 1694 A->spd_set = PETSC_TRUE; 1695 A->spd = flg; 1696 if (flg) { 1697 A->symmetric = PETSC_TRUE; 1698 A->structurally_symmetric = PETSC_TRUE; 1699 A->symmetric_set = PETSC_TRUE; 1700 A->structurally_symmetric_set = PETSC_TRUE; 1701 } 1702 break; 1703 case MAT_SYMMETRIC: 1704 MatCheckPreallocated(A,1); 1705 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1706 break; 1707 case MAT_STRUCTURALLY_SYMMETRIC: 1708 MatCheckPreallocated(A,1); 1709 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1710 break; 1711 case MAT_SYMMETRY_ETERNAL: 1712 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1713 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1714 break; 1715 case MAT_IGNORE_LOWER_TRIANGULAR: 1716 aA->ignore_ltriangular = flg; 1717 break; 1718 case MAT_ERROR_LOWER_TRIANGULAR: 1719 aA->ignore_ltriangular = flg; 1720 break; 1721 case MAT_GETROW_UPPERTRIANGULAR: 1722 aA->getrow_utriangular = flg; 1723 break; 1724 default: 1725 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1726 } 1727 PetscFunctionReturn(0); 1728 } 1729 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1732 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1733 { 1734 PetscErrorCode ierr; 1735 1736 PetscFunctionBegin; 1737 if (MAT_INITIAL_MATRIX || *B != A) { 1738 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1739 } 1740 PetscFunctionReturn(0); 1741 } 1742 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1745 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1746 { 1747 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1748 Mat a = baij->A, b=baij->B; 1749 PetscErrorCode ierr; 1750 PetscInt nv,m,n; 1751 PetscBool flg; 1752 1753 PetscFunctionBegin; 1754 if (ll != rr) { 1755 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1756 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1757 } 1758 if (!ll) PetscFunctionReturn(0); 1759 1760 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1761 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1762 1763 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1764 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1765 1766 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1767 1768 /* left diagonalscale the off-diagonal part */ 1769 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1770 1771 /* scale the diagonal part */ 1772 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1773 1774 /* right diagonalscale the off-diagonal part */ 1775 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1776 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1777 PetscFunctionReturn(0); 1778 } 1779 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1782 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1783 { 1784 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1785 PetscErrorCode ierr; 1786 1787 PetscFunctionBegin; 1788 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1789 PetscFunctionReturn(0); 1790 } 1791 1792 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1793 1794 #undef __FUNCT__ 1795 #define __FUNCT__ "MatEqual_MPISBAIJ" 1796 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1797 { 1798 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1799 Mat a,b,c,d; 1800 PetscBool flg; 1801 PetscErrorCode ierr; 1802 1803 PetscFunctionBegin; 1804 a = matA->A; b = matA->B; 1805 c = matB->A; d = matB->B; 1806 1807 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1808 if (flg) { 1809 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1810 } 1811 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1812 PetscFunctionReturn(0); 1813 } 1814 1815 #undef __FUNCT__ 1816 #define __FUNCT__ "MatCopy_MPISBAIJ" 1817 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1818 { 1819 PetscErrorCode ierr; 1820 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1821 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1822 1823 PetscFunctionBegin; 1824 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1825 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1826 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1827 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1828 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1829 } else { 1830 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1831 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1832 } 1833 PetscFunctionReturn(0); 1834 } 1835 1836 #undef __FUNCT__ 1837 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1838 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1839 { 1840 PetscErrorCode ierr; 1841 1842 PetscFunctionBegin; 1843 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1844 PetscFunctionReturn(0); 1845 } 1846 1847 #undef __FUNCT__ 1848 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1849 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1850 { 1851 PetscErrorCode ierr; 1852 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1853 PetscBLASInt bnz,one=1; 1854 Mat_SeqSBAIJ *xa,*ya; 1855 Mat_SeqBAIJ *xb,*yb; 1856 1857 PetscFunctionBegin; 1858 if (str == SAME_NONZERO_PATTERN) { 1859 PetscScalar alpha = a; 1860 xa = (Mat_SeqSBAIJ*)xx->A->data; 1861 ya = (Mat_SeqSBAIJ*)yy->A->data; 1862 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1863 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1864 xb = (Mat_SeqBAIJ*)xx->B->data; 1865 yb = (Mat_SeqBAIJ*)yy->B->data; 1866 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1867 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1868 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1869 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1870 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1871 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1872 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1873 } else { 1874 Mat B; 1875 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1876 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1877 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1878 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1879 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1880 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1881 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1882 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1883 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1884 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1885 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1886 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1887 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1888 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1889 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1890 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1891 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1892 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1893 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1894 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1895 } 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1901 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1902 { 1903 PetscErrorCode ierr; 1904 PetscInt i; 1905 PetscBool flg; 1906 1907 PetscFunctionBegin; 1908 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1909 for (i=0; i<n; i++) { 1910 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1911 if (!flg) { 1912 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1913 } 1914 } 1915 PetscFunctionReturn(0); 1916 } 1917 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatShift_MPISBAIJ" 1920 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1921 { 1922 PetscErrorCode ierr; 1923 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1924 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1925 1926 PetscFunctionBegin; 1927 if (!Y->preallocated) { 1928 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1929 } else if (!aij->nz) { 1930 PetscInt nonew = aij->nonew; 1931 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1932 aij->nonew = nonew; 1933 } 1934 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1935 PetscFunctionReturn(0); 1936 } 1937 1938 #undef __FUNCT__ 1939 #define __FUNCT__ "MatMissingDiagonal_MPISBAIJ" 1940 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1941 { 1942 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1943 PetscErrorCode ierr; 1944 1945 PetscFunctionBegin; 1946 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1947 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1948 if (d) { 1949 PetscInt rstart; 1950 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1951 *d += rstart/A->rmap->bs; 1952 1953 } 1954 PetscFunctionReturn(0); 1955 } 1956 1957 #undef __FUNCT__ 1958 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1959 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1960 { 1961 PetscFunctionBegin; 1962 *a = ((Mat_MPISBAIJ*)A->data)->A; 1963 PetscFunctionReturn(0); 1964 } 1965 1966 /* -------------------------------------------------------------------*/ 1967 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1968 MatGetRow_MPISBAIJ, 1969 MatRestoreRow_MPISBAIJ, 1970 MatMult_MPISBAIJ, 1971 /* 4*/ MatMultAdd_MPISBAIJ, 1972 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1973 MatMultAdd_MPISBAIJ, 1974 0, 1975 0, 1976 0, 1977 /* 10*/ 0, 1978 0, 1979 0, 1980 MatSOR_MPISBAIJ, 1981 MatTranspose_MPISBAIJ, 1982 /* 15*/ MatGetInfo_MPISBAIJ, 1983 MatEqual_MPISBAIJ, 1984 MatGetDiagonal_MPISBAIJ, 1985 MatDiagonalScale_MPISBAIJ, 1986 MatNorm_MPISBAIJ, 1987 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1988 MatAssemblyEnd_MPISBAIJ, 1989 MatSetOption_MPISBAIJ, 1990 MatZeroEntries_MPISBAIJ, 1991 /* 24*/ 0, 1992 0, 1993 0, 1994 0, 1995 0, 1996 /* 29*/ MatSetUp_MPISBAIJ, 1997 0, 1998 0, 1999 MatGetDiagonalBlock_MPISBAIJ, 2000 0, 2001 /* 34*/ MatDuplicate_MPISBAIJ, 2002 0, 2003 0, 2004 0, 2005 0, 2006 /* 39*/ MatAXPY_MPISBAIJ, 2007 MatGetSubMatrices_MPISBAIJ, 2008 MatIncreaseOverlap_MPISBAIJ, 2009 MatGetValues_MPISBAIJ, 2010 MatCopy_MPISBAIJ, 2011 /* 44*/ 0, 2012 MatScale_MPISBAIJ, 2013 MatShift_MPISBAIJ, 2014 0, 2015 0, 2016 /* 49*/ 0, 2017 0, 2018 0, 2019 0, 2020 0, 2021 /* 54*/ 0, 2022 0, 2023 MatSetUnfactored_MPISBAIJ, 2024 0, 2025 MatSetValuesBlocked_MPISBAIJ, 2026 /* 59*/ MatGetSubMatrix_MPISBAIJ, 2027 0, 2028 0, 2029 0, 2030 0, 2031 /* 64*/ 0, 2032 0, 2033 0, 2034 0, 2035 0, 2036 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 2037 0, 2038 0, 2039 0, 2040 0, 2041 /* 74*/ 0, 2042 0, 2043 0, 2044 0, 2045 0, 2046 /* 79*/ 0, 2047 0, 2048 0, 2049 0, 2050 MatLoad_MPISBAIJ, 2051 /* 84*/ 0, 2052 0, 2053 0, 2054 0, 2055 0, 2056 /* 89*/ 0, 2057 0, 2058 0, 2059 0, 2060 0, 2061 /* 94*/ 0, 2062 0, 2063 0, 2064 0, 2065 0, 2066 /* 99*/ 0, 2067 0, 2068 0, 2069 0, 2070 0, 2071 /*104*/ 0, 2072 MatRealPart_MPISBAIJ, 2073 MatImaginaryPart_MPISBAIJ, 2074 MatGetRowUpperTriangular_MPISBAIJ, 2075 MatRestoreRowUpperTriangular_MPISBAIJ, 2076 /*109*/ 0, 2077 0, 2078 0, 2079 0, 2080 MatMissingDiagonal_MPISBAIJ, 2081 /*114*/ 0, 2082 0, 2083 0, 2084 0, 2085 0, 2086 /*119*/ 0, 2087 0, 2088 0, 2089 0, 2090 0, 2091 /*124*/ 0, 2092 0, 2093 0, 2094 0, 2095 0, 2096 /*129*/ 0, 2097 0, 2098 0, 2099 0, 2100 0, 2101 /*134*/ 0, 2102 0, 2103 0, 2104 0, 2105 0, 2106 /*139*/ 0, 2107 0, 2108 0, 2109 0, 2110 0, 2111 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 2112 }; 2113 2114 #undef __FUNCT__ 2115 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 2116 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2117 { 2118 Mat_MPISBAIJ *b; 2119 PetscErrorCode ierr; 2120 PetscInt i,mbs,Mbs; 2121 2122 PetscFunctionBegin; 2123 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2124 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2125 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2126 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2127 2128 b = (Mat_MPISBAIJ*)B->data; 2129 mbs = B->rmap->n/bs; 2130 Mbs = B->rmap->N/bs; 2131 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 2132 2133 B->rmap->bs = bs; 2134 b->bs2 = bs*bs; 2135 b->mbs = mbs; 2136 b->Mbs = Mbs; 2137 b->nbs = B->cmap->n/bs; 2138 b->Nbs = B->cmap->N/bs; 2139 2140 for (i=0; i<=b->size; i++) { 2141 b->rangebs[i] = B->rmap->range[i]/bs; 2142 } 2143 b->rstartbs = B->rmap->rstart/bs; 2144 b->rendbs = B->rmap->rend/bs; 2145 2146 b->cstartbs = B->cmap->rstart/bs; 2147 b->cendbs = B->cmap->rend/bs; 2148 2149 if (!B->preallocated) { 2150 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2151 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2152 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 2153 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2154 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2155 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2156 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2157 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2158 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2159 } 2160 2161 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2162 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2163 2164 B->preallocated = PETSC_TRUE; 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 2170 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2171 { 2172 PetscInt m,rstart,cstart,cend; 2173 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2174 const PetscInt *JJ =0; 2175 PetscScalar *values=0; 2176 PetscErrorCode ierr; 2177 2178 PetscFunctionBegin; 2179 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2180 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2181 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2182 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2183 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2184 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2185 m = B->rmap->n/bs; 2186 rstart = B->rmap->rstart/bs; 2187 cstart = B->cmap->rstart/bs; 2188 cend = B->cmap->rend/bs; 2189 2190 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2191 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2192 for (i=0; i<m; i++) { 2193 nz = ii[i+1] - ii[i]; 2194 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2195 nz_max = PetscMax(nz_max,nz); 2196 JJ = jj + ii[i]; 2197 for (j=0; j<nz; j++) { 2198 if (*JJ >= cstart) break; 2199 JJ++; 2200 } 2201 d = 0; 2202 for (; j<nz; j++) { 2203 if (*JJ++ >= cend) break; 2204 d++; 2205 } 2206 d_nnz[i] = d; 2207 o_nnz[i] = nz - d; 2208 } 2209 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2210 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2211 2212 values = (PetscScalar*)V; 2213 if (!values) { 2214 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2215 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2216 } 2217 for (i=0; i<m; i++) { 2218 PetscInt row = i + rstart; 2219 PetscInt ncols = ii[i+1] - ii[i]; 2220 const PetscInt *icols = jj + ii[i]; 2221 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2222 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2223 } 2224 2225 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2226 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2227 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2228 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2229 PetscFunctionReturn(0); 2230 } 2231 2232 /*MC 2233 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2234 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2235 the matrix is stored. 2236 2237 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2238 can call MatSetOption(Mat, MAT_HERMITIAN); 2239 2240 Options Database Keys: 2241 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2242 2243 Level: beginner 2244 2245 .seealso: MatCreateMPISBAIJ 2246 M*/ 2247 2248 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 2249 2250 #undef __FUNCT__ 2251 #define __FUNCT__ "MatCreate_MPISBAIJ" 2252 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2253 { 2254 Mat_MPISBAIJ *b; 2255 PetscErrorCode ierr; 2256 PetscBool flg = PETSC_FALSE; 2257 2258 PetscFunctionBegin; 2259 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2260 B->data = (void*)b; 2261 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2262 2263 B->ops->destroy = MatDestroy_MPISBAIJ; 2264 B->ops->view = MatView_MPISBAIJ; 2265 B->assembled = PETSC_FALSE; 2266 B->insertmode = NOT_SET_VALUES; 2267 2268 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2269 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2270 2271 /* build local table of row and column ownerships */ 2272 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2273 2274 /* build cache for off array entries formed */ 2275 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2276 2277 b->donotstash = PETSC_FALSE; 2278 b->colmap = NULL; 2279 b->garray = NULL; 2280 b->roworiented = PETSC_TRUE; 2281 2282 /* stuff used in block assembly */ 2283 b->barray = 0; 2284 2285 /* stuff used for matrix vector multiply */ 2286 b->lvec = 0; 2287 b->Mvctx = 0; 2288 b->slvec0 = 0; 2289 b->slvec0b = 0; 2290 b->slvec1 = 0; 2291 b->slvec1a = 0; 2292 b->slvec1b = 0; 2293 b->sMvctx = 0; 2294 2295 /* stuff for MatGetRow() */ 2296 b->rowindices = 0; 2297 b->rowvalues = 0; 2298 b->getrowactive = PETSC_FALSE; 2299 2300 /* hash table stuff */ 2301 b->ht = 0; 2302 b->hd = 0; 2303 b->ht_size = 0; 2304 b->ht_flag = PETSC_FALSE; 2305 b->ht_fact = 0; 2306 b->ht_total_ct = 0; 2307 b->ht_insert_ct = 0; 2308 2309 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 2310 b->ijonly = PETSC_FALSE; 2311 2312 b->in_loc = 0; 2313 b->v_loc = 0; 2314 b->n_loc = 0; 2315 2316 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2317 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2318 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2319 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2320 #if defined(PETSC_HAVE_ELEMENTAL) 2321 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2322 #endif 2323 2324 B->symmetric = PETSC_TRUE; 2325 B->structurally_symmetric = PETSC_TRUE; 2326 B->symmetric_set = PETSC_TRUE; 2327 B->structurally_symmetric_set = PETSC_TRUE; 2328 2329 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2330 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2331 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2332 if (flg) { 2333 PetscReal fact = 1.39; 2334 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2335 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2336 if (fact <= 1.0) fact = 1.39; 2337 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2338 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2339 } 2340 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2341 PetscFunctionReturn(0); 2342 } 2343 2344 /*MC 2345 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2346 2347 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2348 and MATMPISBAIJ otherwise. 2349 2350 Options Database Keys: 2351 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2352 2353 Level: beginner 2354 2355 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2356 M*/ 2357 2358 #undef __FUNCT__ 2359 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 2360 /*@C 2361 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2362 the user should preallocate the matrix storage by setting the parameters 2363 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2364 performance can be increased by more than a factor of 50. 2365 2366 Collective on Mat 2367 2368 Input Parameters: 2369 + B - the matrix 2370 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2371 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2372 . d_nz - number of block nonzeros per block row in diagonal portion of local 2373 submatrix (same for all local rows) 2374 . d_nnz - array containing the number of block nonzeros in the various block rows 2375 in the upper triangular and diagonal part of the in diagonal portion of the local 2376 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2377 for the diagonal entry and set a value even if it is zero. 2378 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2379 submatrix (same for all local rows). 2380 - o_nnz - array containing the number of nonzeros in the various block rows of the 2381 off-diagonal portion of the local submatrix that is right of the diagonal 2382 (possibly different for each block row) or NULL. 2383 2384 2385 Options Database Keys: 2386 . -mat_no_unroll - uses code that does not unroll the loops in the 2387 block calculations (much slower) 2388 . -mat_block_size - size of the blocks to use 2389 2390 Notes: 2391 2392 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2393 than it must be used on all processors that share the object for that argument. 2394 2395 If the *_nnz parameter is given then the *_nz parameter is ignored 2396 2397 Storage Information: 2398 For a square global matrix we define each processor's diagonal portion 2399 to be its local rows and the corresponding columns (a square submatrix); 2400 each processor's off-diagonal portion encompasses the remainder of the 2401 local matrix (a rectangular submatrix). 2402 2403 The user can specify preallocated storage for the diagonal part of 2404 the local submatrix with either d_nz or d_nnz (not both). Set 2405 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2406 memory allocation. Likewise, specify preallocated storage for the 2407 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2408 2409 You can call MatGetInfo() to get information on how effective the preallocation was; 2410 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2411 You can also run with the option -info and look for messages with the string 2412 malloc in them to see if additional memory allocation was needed. 2413 2414 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2415 the figure below we depict these three local rows and all columns (0-11). 2416 2417 .vb 2418 0 1 2 3 4 5 6 7 8 9 10 11 2419 -------------------------- 2420 row 3 |. . . d d d o o o o o o 2421 row 4 |. . . d d d o o o o o o 2422 row 5 |. . . d d d o o o o o o 2423 -------------------------- 2424 .ve 2425 2426 Thus, any entries in the d locations are stored in the d (diagonal) 2427 submatrix, and any entries in the o locations are stored in the 2428 o (off-diagonal) submatrix. Note that the d matrix is stored in 2429 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2430 2431 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2432 plus the diagonal part of the d matrix, 2433 and o_nz should indicate the number of block nonzeros per row in the o matrix 2434 2435 In general, for PDE problems in which most nonzeros are near the diagonal, 2436 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2437 or you will get TERRIBLE performance; see the users' manual chapter on 2438 matrices. 2439 2440 Level: intermediate 2441 2442 .keywords: matrix, block, aij, compressed row, sparse, parallel 2443 2444 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2445 @*/ 2446 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2447 { 2448 PetscErrorCode ierr; 2449 2450 PetscFunctionBegin; 2451 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2452 PetscValidType(B,1); 2453 PetscValidLogicalCollectiveInt(B,bs,2); 2454 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2455 PetscFunctionReturn(0); 2456 } 2457 2458 #undef __FUNCT__ 2459 #define __FUNCT__ "MatCreateSBAIJ" 2460 /*@C 2461 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2462 (block compressed row). For good matrix assembly performance 2463 the user should preallocate the matrix storage by setting the parameters 2464 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2465 performance can be increased by more than a factor of 50. 2466 2467 Collective on MPI_Comm 2468 2469 Input Parameters: 2470 + comm - MPI communicator 2471 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2472 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2473 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2474 This value should be the same as the local size used in creating the 2475 y vector for the matrix-vector product y = Ax. 2476 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2477 This value should be the same as the local size used in creating the 2478 x vector for the matrix-vector product y = Ax. 2479 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2480 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2481 . d_nz - number of block nonzeros per block row in diagonal portion of local 2482 submatrix (same for all local rows) 2483 . d_nnz - array containing the number of block nonzeros in the various block rows 2484 in the upper triangular portion of the in diagonal portion of the local 2485 (possibly different for each block block row) or NULL. 2486 If you plan to factor the matrix you must leave room for the diagonal entry and 2487 set its value even if it is zero. 2488 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2489 submatrix (same for all local rows). 2490 - o_nnz - array containing the number of nonzeros in the various block rows of the 2491 off-diagonal portion of the local submatrix (possibly different for 2492 each block row) or NULL. 2493 2494 Output Parameter: 2495 . A - the matrix 2496 2497 Options Database Keys: 2498 . -mat_no_unroll - uses code that does not unroll the loops in the 2499 block calculations (much slower) 2500 . -mat_block_size - size of the blocks to use 2501 . -mat_mpi - use the parallel matrix data structures even on one processor 2502 (defaults to using SeqBAIJ format on one processor) 2503 2504 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2505 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2506 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2507 2508 Notes: 2509 The number of rows and columns must be divisible by blocksize. 2510 This matrix type does not support complex Hermitian operation. 2511 2512 The user MUST specify either the local or global matrix dimensions 2513 (possibly both). 2514 2515 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2516 than it must be used on all processors that share the object for that argument. 2517 2518 If the *_nnz parameter is given then the *_nz parameter is ignored 2519 2520 Storage Information: 2521 For a square global matrix we define each processor's diagonal portion 2522 to be its local rows and the corresponding columns (a square submatrix); 2523 each processor's off-diagonal portion encompasses the remainder of the 2524 local matrix (a rectangular submatrix). 2525 2526 The user can specify preallocated storage for the diagonal part of 2527 the local submatrix with either d_nz or d_nnz (not both). Set 2528 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2529 memory allocation. Likewise, specify preallocated storage for the 2530 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2531 2532 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2533 the figure below we depict these three local rows and all columns (0-11). 2534 2535 .vb 2536 0 1 2 3 4 5 6 7 8 9 10 11 2537 -------------------------- 2538 row 3 |. . . d d d o o o o o o 2539 row 4 |. . . d d d o o o o o o 2540 row 5 |. . . d d d o o o o o o 2541 -------------------------- 2542 .ve 2543 2544 Thus, any entries in the d locations are stored in the d (diagonal) 2545 submatrix, and any entries in the o locations are stored in the 2546 o (off-diagonal) submatrix. Note that the d matrix is stored in 2547 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2548 2549 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2550 plus the diagonal part of the d matrix, 2551 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2552 In general, for PDE problems in which most nonzeros are near the diagonal, 2553 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2554 or you will get TERRIBLE performance; see the users' manual chapter on 2555 matrices. 2556 2557 Level: intermediate 2558 2559 .keywords: matrix, block, aij, compressed row, sparse, parallel 2560 2561 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2562 @*/ 2563 2564 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2565 { 2566 PetscErrorCode ierr; 2567 PetscMPIInt size; 2568 2569 PetscFunctionBegin; 2570 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2571 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2572 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2573 if (size > 1) { 2574 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2575 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2576 } else { 2577 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2578 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2579 } 2580 PetscFunctionReturn(0); 2581 } 2582 2583 2584 #undef __FUNCT__ 2585 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2586 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2587 { 2588 Mat mat; 2589 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2590 PetscErrorCode ierr; 2591 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2592 PetscScalar *array; 2593 2594 PetscFunctionBegin; 2595 *newmat = 0; 2596 2597 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2598 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2599 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2600 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2601 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2602 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2603 2604 mat->factortype = matin->factortype; 2605 mat->preallocated = PETSC_TRUE; 2606 mat->assembled = PETSC_TRUE; 2607 mat->insertmode = NOT_SET_VALUES; 2608 2609 a = (Mat_MPISBAIJ*)mat->data; 2610 a->bs2 = oldmat->bs2; 2611 a->mbs = oldmat->mbs; 2612 a->nbs = oldmat->nbs; 2613 a->Mbs = oldmat->Mbs; 2614 a->Nbs = oldmat->Nbs; 2615 2616 2617 a->size = oldmat->size; 2618 a->rank = oldmat->rank; 2619 a->donotstash = oldmat->donotstash; 2620 a->roworiented = oldmat->roworiented; 2621 a->rowindices = 0; 2622 a->rowvalues = 0; 2623 a->getrowactive = PETSC_FALSE; 2624 a->barray = 0; 2625 a->rstartbs = oldmat->rstartbs; 2626 a->rendbs = oldmat->rendbs; 2627 a->cstartbs = oldmat->cstartbs; 2628 a->cendbs = oldmat->cendbs; 2629 2630 /* hash table stuff */ 2631 a->ht = 0; 2632 a->hd = 0; 2633 a->ht_size = 0; 2634 a->ht_flag = oldmat->ht_flag; 2635 a->ht_fact = oldmat->ht_fact; 2636 a->ht_total_ct = 0; 2637 a->ht_insert_ct = 0; 2638 2639 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2640 if (oldmat->colmap) { 2641 #if defined(PETSC_USE_CTABLE) 2642 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2643 #else 2644 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2645 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2646 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2647 #endif 2648 } else a->colmap = 0; 2649 2650 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2651 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2652 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2653 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2654 } else a->garray = 0; 2655 2656 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2657 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2658 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2659 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2660 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2661 2662 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2663 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2664 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2665 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2666 2667 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2668 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2669 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2670 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2671 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2672 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2673 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2674 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2675 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2676 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2677 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2678 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2679 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2680 2681 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2682 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2683 a->sMvctx = oldmat->sMvctx; 2684 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2685 2686 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2687 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2688 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2689 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2690 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2691 *newmat = mat; 2692 PetscFunctionReturn(0); 2693 } 2694 2695 #undef __FUNCT__ 2696 #define __FUNCT__ "MatLoad_MPISBAIJ" 2697 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2698 { 2699 PetscErrorCode ierr; 2700 PetscInt i,nz,j,rstart,rend; 2701 PetscScalar *vals,*buf; 2702 MPI_Comm comm; 2703 MPI_Status status; 2704 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2705 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2706 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2707 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2708 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2709 PetscInt dcount,kmax,k,nzcount,tmp; 2710 int fd; 2711 2712 PetscFunctionBegin; 2713 /* force binary viewer to load .info file if it has not yet done so */ 2714 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2715 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2716 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2717 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2718 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2719 if (bs < 0) bs = 1; 2720 2721 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2722 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2723 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2724 if (!rank) { 2725 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2726 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2727 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2728 } 2729 2730 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2731 M = header[1]; 2732 N = header[2]; 2733 2734 /* If global sizes are set, check if they are consistent with that given in the file */ 2735 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2736 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2737 2738 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2739 2740 /* 2741 This code adds extra rows to make sure the number of rows is 2742 divisible by the blocksize 2743 */ 2744 Mbs = M/bs; 2745 extra_rows = bs - M + bs*(Mbs); 2746 if (extra_rows == bs) extra_rows = 0; 2747 else Mbs++; 2748 if (extra_rows &&!rank) { 2749 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2750 } 2751 2752 /* determine ownership of all rows */ 2753 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2754 mbs = Mbs/size + ((Mbs % size) > rank); 2755 m = mbs*bs; 2756 } else { /* User Set */ 2757 m = newmat->rmap->n; 2758 mbs = m/bs; 2759 } 2760 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2761 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2762 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2763 rowners[0] = 0; 2764 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2765 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2766 rstart = rowners[rank]; 2767 rend = rowners[rank+1]; 2768 2769 /* distribute row lengths to all processors */ 2770 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2771 if (!rank) { 2772 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2773 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2774 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2775 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2776 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2777 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2778 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2779 } else { 2780 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2781 } 2782 2783 if (!rank) { /* procs[0] */ 2784 /* calculate the number of nonzeros on each processor */ 2785 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2786 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2787 for (i=0; i<size; i++) { 2788 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2789 procsnz[i] += rowlengths[j]; 2790 } 2791 } 2792 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2793 2794 /* determine max buffer needed and allocate it */ 2795 maxnz = 0; 2796 for (i=0; i<size; i++) { 2797 maxnz = PetscMax(maxnz,procsnz[i]); 2798 } 2799 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2800 2801 /* read in my part of the matrix column indices */ 2802 nz = procsnz[0]; 2803 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2804 mycols = ibuf; 2805 if (size == 1) nz -= extra_rows; 2806 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2807 if (size == 1) { 2808 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2809 } 2810 2811 /* read in every ones (except the last) and ship off */ 2812 for (i=1; i<size-1; i++) { 2813 nz = procsnz[i]; 2814 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2815 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2816 } 2817 /* read in the stuff for the last proc */ 2818 if (size != 1) { 2819 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2820 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2821 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2822 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2823 } 2824 ierr = PetscFree(cols);CHKERRQ(ierr); 2825 } else { /* procs[i], i>0 */ 2826 /* determine buffer space needed for message */ 2827 nz = 0; 2828 for (i=0; i<m; i++) nz += locrowlens[i]; 2829 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2830 mycols = ibuf; 2831 /* receive message of column indices*/ 2832 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2833 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2834 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2835 } 2836 2837 /* loop over local rows, determining number of off diagonal entries */ 2838 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2839 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2840 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2841 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2842 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2843 rowcount = 0; 2844 nzcount = 0; 2845 for (i=0; i<mbs; i++) { 2846 dcount = 0; 2847 odcount = 0; 2848 for (j=0; j<bs; j++) { 2849 kmax = locrowlens[rowcount]; 2850 for (k=0; k<kmax; k++) { 2851 tmp = mycols[nzcount++]/bs; /* block col. index */ 2852 if (!mask[tmp]) { 2853 mask[tmp] = 1; 2854 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2855 else masked1[dcount++] = tmp; /* entry in diag portion */ 2856 } 2857 } 2858 rowcount++; 2859 } 2860 2861 dlens[i] = dcount; /* d_nzz[i] */ 2862 odlens[i] = odcount; /* o_nzz[i] */ 2863 2864 /* zero out the mask elements we set */ 2865 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2866 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2867 } 2868 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2869 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2870 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2871 2872 if (!rank) { 2873 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2874 /* read in my part of the matrix numerical values */ 2875 nz = procsnz[0]; 2876 vals = buf; 2877 mycols = ibuf; 2878 if (size == 1) nz -= extra_rows; 2879 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2880 if (size == 1) { 2881 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2882 } 2883 2884 /* insert into matrix */ 2885 jj = rstart*bs; 2886 for (i=0; i<m; i++) { 2887 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2888 mycols += locrowlens[i]; 2889 vals += locrowlens[i]; 2890 jj++; 2891 } 2892 2893 /* read in other processors (except the last one) and ship out */ 2894 for (i=1; i<size-1; i++) { 2895 nz = procsnz[i]; 2896 vals = buf; 2897 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2898 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2899 } 2900 /* the last proc */ 2901 if (size != 1) { 2902 nz = procsnz[i] - extra_rows; 2903 vals = buf; 2904 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2905 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2906 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2907 } 2908 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2909 2910 } else { 2911 /* receive numeric values */ 2912 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2913 2914 /* receive message of values*/ 2915 vals = buf; 2916 mycols = ibuf; 2917 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2918 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2919 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2920 2921 /* insert into matrix */ 2922 jj = rstart*bs; 2923 for (i=0; i<m; i++) { 2924 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2925 mycols += locrowlens[i]; 2926 vals += locrowlens[i]; 2927 jj++; 2928 } 2929 } 2930 2931 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2932 ierr = PetscFree(buf);CHKERRQ(ierr); 2933 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2934 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2935 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2936 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2937 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2938 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2939 PetscFunctionReturn(0); 2940 } 2941 2942 #undef __FUNCT__ 2943 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2944 /*XXXXX@ 2945 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2946 2947 Input Parameters: 2948 . mat - the matrix 2949 . fact - factor 2950 2951 Not Collective on Mat, each process can have a different hash factor 2952 2953 Level: advanced 2954 2955 Notes: 2956 This can also be set by the command line option: -mat_use_hash_table fact 2957 2958 .keywords: matrix, hashtable, factor, HT 2959 2960 .seealso: MatSetOption() 2961 @XXXXX*/ 2962 2963 2964 #undef __FUNCT__ 2965 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2966 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2967 { 2968 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2969 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2970 PetscReal atmp; 2971 PetscReal *work,*svalues,*rvalues; 2972 PetscErrorCode ierr; 2973 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2974 PetscMPIInt rank,size; 2975 PetscInt *rowners_bs,dest,count,source; 2976 PetscScalar *va; 2977 MatScalar *ba; 2978 MPI_Status stat; 2979 2980 PetscFunctionBegin; 2981 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2982 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2983 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2984 2985 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2986 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2987 2988 bs = A->rmap->bs; 2989 mbs = a->mbs; 2990 Mbs = a->Mbs; 2991 ba = b->a; 2992 bi = b->i; 2993 bj = b->j; 2994 2995 /* find ownerships */ 2996 rowners_bs = A->rmap->range; 2997 2998 /* each proc creates an array to be distributed */ 2999 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 3000 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 3001 3002 /* row_max for B */ 3003 if (rank != size-1) { 3004 for (i=0; i<mbs; i++) { 3005 ncols = bi[1] - bi[0]; bi++; 3006 brow = bs*i; 3007 for (j=0; j<ncols; j++) { 3008 bcol = bs*(*bj); 3009 for (kcol=0; kcol<bs; kcol++) { 3010 col = bcol + kcol; /* local col index */ 3011 col += rowners_bs[rank+1]; /* global col index */ 3012 for (krow=0; krow<bs; krow++) { 3013 atmp = PetscAbsScalar(*ba); ba++; 3014 row = brow + krow; /* local row index */ 3015 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 3016 if (work[col] < atmp) work[col] = atmp; 3017 } 3018 } 3019 bj++; 3020 } 3021 } 3022 3023 /* send values to its owners */ 3024 for (dest=rank+1; dest<size; dest++) { 3025 svalues = work + rowners_bs[dest]; 3026 count = rowners_bs[dest+1]-rowners_bs[dest]; 3027 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 3028 } 3029 } 3030 3031 /* receive values */ 3032 if (rank) { 3033 rvalues = work; 3034 count = rowners_bs[rank+1]-rowners_bs[rank]; 3035 for (source=0; source<rank; source++) { 3036 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 3037 /* process values */ 3038 for (i=0; i<count; i++) { 3039 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 3040 } 3041 } 3042 } 3043 3044 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3045 ierr = PetscFree(work);CHKERRQ(ierr); 3046 PetscFunctionReturn(0); 3047 } 3048 3049 #undef __FUNCT__ 3050 #define __FUNCT__ "MatSOR_MPISBAIJ" 3051 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3052 { 3053 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3054 PetscErrorCode ierr; 3055 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 3056 PetscScalar *x,*ptr,*from; 3057 Vec bb1; 3058 const PetscScalar *b; 3059 3060 PetscFunctionBegin; 3061 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3062 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3063 3064 if (flag == SOR_APPLY_UPPER) { 3065 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3066 PetscFunctionReturn(0); 3067 } 3068 3069 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3070 if (flag & SOR_ZERO_INITIAL_GUESS) { 3071 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3072 its--; 3073 } 3074 3075 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3076 while (its--) { 3077 3078 /* lower triangular part: slvec0b = - B^T*xx */ 3079 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3080 3081 /* copy xx into slvec0a */ 3082 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3083 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3084 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3085 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3086 3087 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 3088 3089 /* copy bb into slvec1a */ 3090 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3091 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3092 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3093 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3094 3095 /* set slvec1b = 0 */ 3096 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3097 3098 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3099 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3100 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3101 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3102 3103 /* upper triangular part: bb1 = bb1 - B*x */ 3104 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 3105 3106 /* local diagonal sweep */ 3107 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3108 } 3109 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3110 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3111 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3112 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3113 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3114 } else if (flag & SOR_EISENSTAT) { 3115 Vec xx1; 3116 PetscBool hasop; 3117 const PetscScalar *diag; 3118 PetscScalar *sl,scale = (omega - 2.0)/omega; 3119 PetscInt i,n; 3120 3121 if (!mat->xx1) { 3122 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 3123 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 3124 } 3125 xx1 = mat->xx1; 3126 bb1 = mat->bb1; 3127 3128 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 3129 3130 if (!mat->diag) { 3131 /* this is wrong for same matrix with new nonzero values */ 3132 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 3133 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 3134 } 3135 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 3136 3137 if (hasop) { 3138 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 3139 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3140 } else { 3141 /* 3142 These two lines are replaced by code that may be a bit faster for a good compiler 3143 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 3144 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3145 */ 3146 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3147 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3148 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3149 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3150 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 3151 if (omega == 1.0) { 3152 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 3153 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 3154 } else { 3155 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 3156 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 3157 } 3158 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3159 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3160 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3161 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3162 } 3163 3164 /* multiply off-diagonal portion of matrix */ 3165 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3166 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3167 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 3168 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3169 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3170 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 3171 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3172 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3173 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3174 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 3175 3176 /* local sweep */ 3177 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 3178 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 3179 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3180 PetscFunctionReturn(0); 3181 } 3182 3183 #undef __FUNCT__ 3184 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 3185 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3186 { 3187 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3188 PetscErrorCode ierr; 3189 Vec lvec1,bb1; 3190 3191 PetscFunctionBegin; 3192 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3193 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3194 3195 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3196 if (flag & SOR_ZERO_INITIAL_GUESS) { 3197 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3198 its--; 3199 } 3200 3201 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 3202 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3203 while (its--) { 3204 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3205 3206 /* lower diagonal part: bb1 = bb - B^T*xx */ 3207 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3208 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3209 3210 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3211 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3212 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3213 3214 /* upper diagonal part: bb1 = bb1 - B*x */ 3215 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3216 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3217 3218 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3219 3220 /* diagonal sweep */ 3221 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3222 } 3223 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3224 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3225 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3226 PetscFunctionReturn(0); 3227 } 3228 3229 #undef __FUNCT__ 3230 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 3231 /*@ 3232 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3233 CSR format the local rows. 3234 3235 Collective on MPI_Comm 3236 3237 Input Parameters: 3238 + comm - MPI communicator 3239 . bs - the block size, only a block size of 1 is supported 3240 . m - number of local rows (Cannot be PETSC_DECIDE) 3241 . n - This value should be the same as the local size used in creating the 3242 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3243 calculated if N is given) For square matrices n is almost always m. 3244 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3245 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3246 . i - row indices 3247 . j - column indices 3248 - a - matrix values 3249 3250 Output Parameter: 3251 . mat - the matrix 3252 3253 Level: intermediate 3254 3255 Notes: 3256 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3257 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3258 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3259 3260 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3261 3262 .keywords: matrix, aij, compressed row, sparse, parallel 3263 3264 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3265 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3266 @*/ 3267 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3268 { 3269 PetscErrorCode ierr; 3270 3271 3272 PetscFunctionBegin; 3273 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3274 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3275 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3276 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3277 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3278 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3279 PetscFunctionReturn(0); 3280 } 3281 3282 3283 #undef __FUNCT__ 3284 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 3285 /*@C 3286 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3287 (the default parallel PETSc format). 3288 3289 Collective on MPI_Comm 3290 3291 Input Parameters: 3292 + B - the matrix 3293 . bs - the block size 3294 . i - the indices into j for the start of each local row (starts with zero) 3295 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3296 - v - optional values in the matrix 3297 3298 Level: developer 3299 3300 .keywords: matrix, aij, compressed row, sparse, parallel 3301 3302 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3303 @*/ 3304 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3305 { 3306 PetscErrorCode ierr; 3307 3308 PetscFunctionBegin; 3309 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3310 PetscFunctionReturn(0); 3311 } 3312 3313 #undef __FUNCT__ 3314 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 3315 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3316 { 3317 PetscErrorCode ierr; 3318 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3319 PetscInt *indx; 3320 PetscScalar *values; 3321 3322 PetscFunctionBegin; 3323 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3324 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3325 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3326 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 3327 PetscInt *bindx,rmax=a->rmax,j; 3328 3329 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3330 mbs = m/bs; Nbs = N/cbs; 3331 if (n == PETSC_DECIDE) { 3332 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3333 } 3334 /* Check sum(n) = Nbs */ 3335 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3336 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3337 3338 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3339 rstart -= mbs; 3340 3341 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3342 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3343 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3344 for (i=0; i<mbs; i++) { 3345 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3346 nnz = nnz/bs; 3347 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3348 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3349 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3350 } 3351 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3352 ierr = PetscFree(bindx);CHKERRQ(ierr); 3353 3354 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3355 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3356 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3357 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 3358 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3359 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3360 } 3361 3362 /* numeric phase */ 3363 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3364 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3365 3366 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3367 for (i=0; i<m; i++) { 3368 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3369 Ii = i + rstart; 3370 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3371 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3372 } 3373 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3374 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3375 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3376 PetscFunctionReturn(0); 3377 } 3378