1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec); 22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 103 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \ 104 N = nrow++ - 1; \ 105 /* shift up all the later entries in this row */ \ 106 for (ii=N; ii>=_i; ii--) { \ 107 rp[ii+1] = rp[ii]; \ 108 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 109 } \ 110 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 111 rp[_i] = bcol; \ 112 ap[bs2*_i + bs*cidx + ridx] = value; \ 113 a_noinsert:; \ 114 ailen[brow] = nrow; \ 115 } 116 #ifndef MatSetValues_SeqBAIJ_B_Private 117 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 118 { \ 119 brow = row/bs; \ 120 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 121 rmax = bimax[brow]; nrow = bilen[brow]; \ 122 bcol = col/bs; \ 123 ridx = row % bs; cidx = col % bs; \ 124 low = 0; high = nrow; \ 125 while (high-low > 3) { \ 126 t = (low+high)/2; \ 127 if (rp[t] > bcol) high = t; \ 128 else low = t; \ 129 } \ 130 for (_i=low; _i<high; _i++) { \ 131 if (rp[_i] > bcol) break; \ 132 if (rp[_i] == bcol) { \ 133 bap = ap + bs2*_i + bs*cidx + ridx; \ 134 if (addv == ADD_VALUES) *bap += value; \ 135 else *bap = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (b->nonew == 1) goto b_noinsert; \ 140 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 141 MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 b_noinsert:; \ 152 bilen[brow] = nrow; \ 153 } 154 #endif 155 156 #if defined(PETSC_USE_MAT_SINGLE) 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatSetValues_MPISBAIJ" 159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 160 { 161 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,N = m*n; 164 MatScalar *vsingle; 165 166 PetscFunctionBegin; 167 if (N > b->setvalueslen) { 168 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 169 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 170 b->setvalueslen = N; 171 } 172 vsingle = b->setvaluescopy; 173 174 for (i=0; i<N; i++) { 175 vsingle[i] = v[i]; 176 } 177 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 184 { 185 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 186 PetscErrorCode ierr; 187 PetscInt i,N = m*n*b->bs2; 188 MatScalar *vsingle; 189 190 PetscFunctionBegin; 191 if (N > b->setvalueslen) { 192 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 193 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 194 b->setvalueslen = N; 195 } 196 vsingle = b->setvaluescopy; 197 for (i=0; i<N; i++) { 198 vsingle[i] = v[i]; 199 } 200 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 201 PetscFunctionReturn(0); 202 } 203 #endif 204 205 /* Only add/insert a(i,j) with i<=j (blocks). 206 Any a(i,j) with i>j input by user is ingored. 207 */ 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 211 { 212 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 213 MatScalar value; 214 PetscTruth roworiented = baij->roworiented; 215 PetscErrorCode ierr; 216 PetscInt i,j,row,col; 217 PetscInt rstart_orig=baij->rstart_bs; 218 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 219 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 220 221 /* Some Variables required in the macro */ 222 Mat A = baij->A; 223 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 224 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 225 MatScalar *aa=a->a; 226 227 Mat B = baij->B; 228 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 229 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 230 MatScalar *ba=b->a; 231 232 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 233 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 234 MatScalar *ap,*bap; 235 236 /* for stash */ 237 PetscInt n_loc, *in_loc=0; 238 MatScalar *v_loc=0; 239 240 PetscFunctionBegin; 241 242 if(!baij->donotstash){ 243 ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr); 244 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 245 } 246 247 for (i=0; i<m; i++) { 248 if (im[i] < 0) continue; 249 #if defined(PETSC_USE_DEBUG) 250 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 251 #endif 252 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 253 row = im[i] - rstart_orig; /* local row index */ 254 for (j=0; j<n; j++) { 255 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 256 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 257 col = in[j] - cstart_orig; /* local col index */ 258 brow = row/bs; bcol = col/bs; 259 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 260 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 261 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 262 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 263 } else if (in[j] < 0) continue; 264 #if defined(PETSC_USE_DEBUG) 265 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 266 #endif 267 else { /* off-diag entry (B) */ 268 if (mat->was_assembled) { 269 if (!baij->colmap) { 270 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 271 } 272 #if defined (PETSC_USE_CTABLE) 273 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 274 col = col - 1; 275 #else 276 col = baij->colmap[in[j]/bs] - 1; 277 #endif 278 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 279 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 280 col = in[j]; 281 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 282 B = baij->B; 283 b = (Mat_SeqBAIJ*)(B)->data; 284 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 285 ba=b->a; 286 } else col += in[j]%bs; 287 } else col = in[j]; 288 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 289 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 290 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 291 } 292 } 293 } else { /* off processor entry */ 294 if (!baij->donotstash) { 295 n_loc = 0; 296 for (j=0; j<n; j++){ 297 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 298 in_loc[n_loc] = in[j]; 299 if (roworiented) { 300 v_loc[n_loc] = v[i*n+j]; 301 } else { 302 v_loc[n_loc] = v[j*m+i]; 303 } 304 n_loc++; 305 } 306 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 307 } 308 } 309 } 310 311 if(!baij->donotstash){ 312 ierr = PetscFree(in_loc);CHKERRQ(ierr); 313 ierr = PetscFree(v_loc);CHKERRQ(ierr); 314 } 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 320 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 321 { 322 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 323 const MatScalar *value; 324 MatScalar *barray=baij->barray; 325 PetscTruth roworiented = baij->roworiented; 326 PetscErrorCode ierr; 327 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 328 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 329 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 330 331 PetscFunctionBegin; 332 if(!barray) { 333 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 334 baij->barray = barray; 335 } 336 337 if (roworiented) { 338 stepval = (n-1)*bs; 339 } else { 340 stepval = (m-1)*bs; 341 } 342 for (i=0; i<m; i++) { 343 if (im[i] < 0) continue; 344 #if defined(PETSC_USE_DEBUG) 345 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 346 #endif 347 if (im[i] >= rstart && im[i] < rend) { 348 row = im[i] - rstart; 349 for (j=0; j<n; j++) { 350 /* If NumCol = 1 then a copy is not required */ 351 if ((roworiented) && (n == 1)) { 352 barray = (MatScalar*) v + i*bs2; 353 } else if((!roworiented) && (m == 1)) { 354 barray = (MatScalar*) v + j*bs2; 355 } else { /* Here a copy is required */ 356 if (roworiented) { 357 value = v + i*(stepval+bs)*bs + j*bs; 358 } else { 359 value = v + j*(stepval+bs)*bs + i*bs; 360 } 361 for (ii=0; ii<bs; ii++,value+=stepval) { 362 for (jj=0; jj<bs; jj++) { 363 *barray++ = *value++; 364 } 365 } 366 barray -=bs2; 367 } 368 369 if (in[j] >= cstart && in[j] < cend){ 370 col = in[j] - cstart; 371 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 372 } 373 else if (in[j] < 0) continue; 374 #if defined(PETSC_USE_DEBUG) 375 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 376 #endif 377 else { 378 if (mat->was_assembled) { 379 if (!baij->colmap) { 380 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 381 } 382 383 #if defined(PETSC_USE_DEBUG) 384 #if defined (PETSC_USE_CTABLE) 385 { PetscInt data; 386 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 387 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 388 } 389 #else 390 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 391 #endif 392 #endif 393 #if defined (PETSC_USE_CTABLE) 394 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 395 col = (col - 1)/bs; 396 #else 397 col = (baij->colmap[in[j]] - 1)/bs; 398 #endif 399 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 400 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 401 col = in[j]; 402 } 403 } 404 else col = in[j]; 405 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 406 } 407 } 408 } else { 409 if (!baij->donotstash) { 410 if (roworiented) { 411 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 412 } else { 413 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 414 } 415 } 416 } 417 } 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatGetValues_MPISBAIJ" 423 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 424 { 425 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 426 PetscErrorCode ierr; 427 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 428 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 429 430 PetscFunctionBegin; 431 for (i=0; i<m; i++) { 432 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 433 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 434 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 435 row = idxm[i] - bsrstart; 436 for (j=0; j<n; j++) { 437 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); 438 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 439 if (idxn[j] >= bscstart && idxn[j] < bscend){ 440 col = idxn[j] - bscstart; 441 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } else { 443 if (!baij->colmap) { 444 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 445 } 446 #if defined (PETSC_USE_CTABLE) 447 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 448 data --; 449 #else 450 data = baij->colmap[idxn[j]/bs]-1; 451 #endif 452 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 453 else { 454 col = data + idxn[j]%bs; 455 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 456 } 457 } 458 } 459 } else { 460 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 461 } 462 } 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatNorm_MPISBAIJ" 468 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 469 { 470 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 471 PetscErrorCode ierr; 472 PetscReal sum[2],*lnorm2; 473 474 PetscFunctionBegin; 475 if (baij->size == 1) { 476 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 477 } else { 478 if (type == NORM_FROBENIUS) { 479 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 480 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 481 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 482 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 483 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 484 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 485 *norm = sqrt(sum[0] + 2*sum[1]); 486 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 487 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 488 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 489 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 490 PetscReal *rsum,*rsum2,vabs; 491 PetscInt *jj,*garray=baij->garray,rstart=baij->rstart,nz; 492 PetscInt brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs; 493 MatScalar *v; 494 495 ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr); 496 rsum2 = rsum + mat->N; 497 ierr = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 498 /* Amat */ 499 v = amat->a; jj = amat->j; 500 for (brow=0; brow<mbs; brow++) { 501 grow = bs*(rstart + brow); 502 nz = amat->i[brow+1] - amat->i[brow]; 503 for (bcol=0; bcol<nz; bcol++){ 504 gcol = bs*(rstart + *jj); jj++; 505 for (col=0; col<bs; col++){ 506 for (row=0; row<bs; row++){ 507 vabs = PetscAbsScalar(*v); v++; 508 rsum[gcol+col] += vabs; 509 /* non-diagonal block */ 510 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 511 } 512 } 513 } 514 } 515 /* Bmat */ 516 v = bmat->a; jj = bmat->j; 517 for (brow=0; brow<mbs; brow++) { 518 grow = bs*(rstart + brow); 519 nz = bmat->i[brow+1] - bmat->i[brow]; 520 for (bcol=0; bcol<nz; bcol++){ 521 gcol = bs*garray[*jj]; jj++; 522 for (col=0; col<bs; col++){ 523 for (row=0; row<bs; row++){ 524 vabs = PetscAbsScalar(*v); v++; 525 rsum[gcol+col] += vabs; 526 rsum[grow+row] += vabs; 527 } 528 } 529 } 530 } 531 ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 532 *norm = 0.0; 533 for (col=0; col<mat->N; col++) { 534 if (rsum2[col] > *norm) *norm = rsum2[col]; 535 } 536 ierr = PetscFree(rsum);CHKERRQ(ierr); 537 } else { 538 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 539 } 540 } 541 PetscFunctionReturn(0); 542 } 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 546 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 547 { 548 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 549 PetscErrorCode ierr; 550 PetscInt nstash,reallocs; 551 InsertMode addv; 552 553 PetscFunctionBegin; 554 if (baij->donotstash) { 555 PetscFunctionReturn(0); 556 } 557 558 /* make sure all processors are either in INSERTMODE or ADDMODE */ 559 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 560 if (addv == (ADD_VALUES|INSERT_VALUES)) { 561 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 562 } 563 mat->insertmode = addv; /* in case this processor had no cache */ 564 565 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 566 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 567 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 568 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 569 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 570 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 576 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 577 { 578 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 579 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 580 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 581 PetscErrorCode ierr; 582 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 583 PetscInt *row,*col,other_disassembled; 584 PetscMPIInt n; 585 PetscTruth r1,r2,r3; 586 MatScalar *val; 587 InsertMode addv = mat->insertmode; 588 589 PetscFunctionBegin; 590 591 if (!baij->donotstash) { 592 while (1) { 593 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 594 if (!flg) break; 595 596 for (i=0; i<n;) { 597 /* Now identify the consecutive vals belonging to the same row */ 598 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 599 if (j < n) ncols = j-i; 600 else ncols = n-i; 601 /* Now assemble all these values with a single function call */ 602 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 603 i = j; 604 } 605 } 606 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 607 /* Now process the block-stash. Since the values are stashed column-oriented, 608 set the roworiented flag to column oriented, and after MatSetValues() 609 restore the original flags */ 610 r1 = baij->roworiented; 611 r2 = a->roworiented; 612 r3 = b->roworiented; 613 baij->roworiented = PETSC_FALSE; 614 a->roworiented = PETSC_FALSE; 615 b->roworiented = PETSC_FALSE; 616 while (1) { 617 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 618 if (!flg) break; 619 620 for (i=0; i<n;) { 621 /* Now identify the consecutive vals belonging to the same row */ 622 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 623 if (j < n) ncols = j-i; 624 else ncols = n-i; 625 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 626 i = j; 627 } 628 } 629 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 630 baij->roworiented = r1; 631 a->roworiented = r2; 632 b->roworiented = r3; 633 } 634 635 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 636 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 637 638 /* determine if any processor has disassembled, if so we must 639 also disassemble ourselfs, in order that we may reassemble. */ 640 /* 641 if nonzero structure of submatrix B cannot change then we know that 642 no processor disassembled thus we can skip this stuff 643 */ 644 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 645 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 646 if (mat->was_assembled && !other_disassembled) { 647 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 648 } 649 } 650 651 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 652 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 653 } 654 b->compressedrow.use = PETSC_TRUE; 655 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 656 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 657 658 if (baij->rowvalues) { 659 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 660 baij->rowvalues = 0; 661 } 662 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 668 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 669 { 670 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 671 PetscErrorCode ierr; 672 PetscInt bs = mat->bs; 673 PetscMPIInt size = baij->size,rank = baij->rank; 674 PetscTruth iascii,isdraw; 675 PetscViewer sviewer; 676 PetscViewerFormat format; 677 678 PetscFunctionBegin; 679 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 680 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 681 if (iascii) { 682 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 683 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 684 MatInfo info; 685 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 686 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 687 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 688 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 689 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 690 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 692 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 694 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 695 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 696 PetscFunctionReturn(0); 697 } else if (format == PETSC_VIEWER_ASCII_INFO) { 698 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } 701 } 702 703 if (isdraw) { 704 PetscDraw draw; 705 PetscTruth isnull; 706 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 707 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 708 } 709 710 if (size == 1) { 711 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 712 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 713 } else { 714 /* assemble the entire matrix onto first processor. */ 715 Mat A; 716 Mat_SeqSBAIJ *Aloc; 717 Mat_SeqBAIJ *Bloc; 718 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 719 MatScalar *a; 720 721 /* Should this be the same type as mat? */ 722 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 723 if (!rank) { 724 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 725 } else { 726 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 727 } 728 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 729 ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 730 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 731 732 /* copy over the A part */ 733 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 734 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 735 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 736 737 for (i=0; i<mbs; i++) { 738 rvals[0] = bs*(baij->rstart + i); 739 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 740 for (j=ai[i]; j<ai[i+1]; j++) { 741 col = (baij->cstart+aj[j])*bs; 742 for (k=0; k<bs; k++) { 743 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 744 col++; a += bs; 745 } 746 } 747 } 748 /* copy over the B part */ 749 Bloc = (Mat_SeqBAIJ*)baij->B->data; 750 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 751 for (i=0; i<mbs; i++) { 752 rvals[0] = bs*(baij->rstart + i); 753 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 754 for (j=ai[i]; j<ai[i+1]; j++) { 755 col = baij->garray[aj[j]]*bs; 756 for (k=0; k<bs; k++) { 757 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 758 col++; a += bs; 759 } 760 } 761 } 762 ierr = PetscFree(rvals);CHKERRQ(ierr); 763 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 764 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 765 /* 766 Everyone has to call to draw the matrix since the graphics waits are 767 synchronized across all processors that share the PetscDraw object 768 */ 769 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 770 if (!rank) { 771 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 772 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 773 } 774 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 775 ierr = MatDestroy(A);CHKERRQ(ierr); 776 } 777 PetscFunctionReturn(0); 778 } 779 780 #undef __FUNCT__ 781 #define __FUNCT__ "MatView_MPISBAIJ" 782 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 783 { 784 PetscErrorCode ierr; 785 PetscTruth iascii,isdraw,issocket,isbinary; 786 787 PetscFunctionBegin; 788 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 789 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 790 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 791 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 792 if (iascii || isdraw || issocket || isbinary) { 793 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 794 } else { 795 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 796 } 797 PetscFunctionReturn(0); 798 } 799 800 #undef __FUNCT__ 801 #define __FUNCT__ "MatDestroy_MPISBAIJ" 802 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 803 { 804 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 805 PetscErrorCode ierr; 806 807 PetscFunctionBegin; 808 #if defined(PETSC_USE_LOG) 809 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 810 #endif 811 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 812 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 813 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 814 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 815 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 816 #if defined (PETSC_USE_CTABLE) 817 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 818 #else 819 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 820 #endif 821 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 822 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 823 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 824 if (baij->slvec0) { 825 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 826 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 827 } 828 if (baij->slvec1) { 829 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 830 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 831 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 832 } 833 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 834 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 835 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 836 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 837 #if defined(PETSC_USE_MAT_SINGLE) 838 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 839 #endif 840 ierr = PetscFree(baij);CHKERRQ(ierr); 841 842 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 843 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 844 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 845 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 846 PetscFunctionReturn(0); 847 } 848 849 #undef __FUNCT__ 850 #define __FUNCT__ "MatMult_MPISBAIJ" 851 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 852 { 853 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 854 PetscErrorCode ierr; 855 PetscInt nt,mbs=a->mbs,bs=A->bs; 856 PetscScalar *x,*from,zero=0.0; 857 858 PetscFunctionBegin; 859 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 860 if (nt != A->n) { 861 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 862 } 863 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 864 if (nt != A->m) { 865 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 866 } 867 868 /* diagonal part */ 869 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 870 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 871 872 /* subdiagonal part */ 873 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 874 875 /* copy x into the vec slvec0 */ 876 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 877 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 878 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 879 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 880 881 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 882 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 883 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 884 885 /* supperdiagonal part */ 886 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 887 888 PetscFunctionReturn(0); 889 } 890 891 #undef __FUNCT__ 892 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 893 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 894 { 895 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 896 PetscErrorCode ierr; 897 PetscInt nt; 898 899 PetscFunctionBegin; 900 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 901 if (nt != A->n) { 902 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 903 } 904 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 905 if (nt != A->m) { 906 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 907 } 908 909 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 910 /* do diagonal part */ 911 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 912 /* do supperdiagonal part */ 913 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 914 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 915 /* do subdiagonal part */ 916 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 917 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 918 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 919 920 PetscFunctionReturn(0); 921 } 922 923 #undef __FUNCT__ 924 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 925 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 926 { 927 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 928 PetscErrorCode ierr; 929 PetscInt mbs=a->mbs,bs=A->bs; 930 PetscScalar *x,*from,zero=0.0; 931 932 PetscFunctionBegin; 933 /* 934 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 935 PetscSynchronizedFlush(A->comm); 936 */ 937 /* diagonal part */ 938 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 939 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 940 941 /* subdiagonal part */ 942 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 943 944 /* copy x into the vec slvec0 */ 945 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 946 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 947 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 948 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 949 950 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 951 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 952 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 953 954 /* supperdiagonal part */ 955 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 956 957 PetscFunctionReturn(0); 958 } 959 960 #undef __FUNCT__ 961 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 962 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 963 { 964 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 965 PetscErrorCode ierr; 966 967 PetscFunctionBegin; 968 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 969 /* do diagonal part */ 970 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 971 /* do supperdiagonal part */ 972 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 973 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 974 975 /* do subdiagonal part */ 976 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 977 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 978 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 979 980 PetscFunctionReturn(0); 981 } 982 983 /* 984 This only works correctly for square matrices where the subblock A->A is the 985 diagonal block 986 */ 987 #undef __FUNCT__ 988 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 989 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 990 { 991 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 992 PetscErrorCode ierr; 993 994 PetscFunctionBegin; 995 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 996 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 997 PetscFunctionReturn(0); 998 } 999 1000 #undef __FUNCT__ 1001 #define __FUNCT__ "MatScale_MPISBAIJ" 1002 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1003 { 1004 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1005 PetscErrorCode ierr; 1006 1007 PetscFunctionBegin; 1008 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1009 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1015 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1016 { 1017 PetscFunctionBegin; 1018 if (matin) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format"); 1019 PetscFunctionReturn(0); 1020 } 1021 1022 #undef __FUNCT__ 1023 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1024 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1025 { 1026 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1027 1028 PetscFunctionBegin; 1029 if (!baij->getrowactive) { 1030 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1031 } 1032 baij->getrowactive = PETSC_FALSE; 1033 PetscFunctionReturn(0); 1034 } 1035 1036 #undef __FUNCT__ 1037 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1038 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1039 { 1040 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1041 PetscErrorCode ierr; 1042 1043 PetscFunctionBegin; 1044 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1045 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1046 PetscFunctionReturn(0); 1047 } 1048 1049 #undef __FUNCT__ 1050 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1051 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1052 { 1053 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1054 Mat A = a->A,B = a->B; 1055 PetscErrorCode ierr; 1056 PetscReal isend[5],irecv[5]; 1057 1058 PetscFunctionBegin; 1059 info->block_size = (PetscReal)matin->bs; 1060 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1061 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1062 isend[3] = info->memory; isend[4] = info->mallocs; 1063 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1064 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1065 isend[3] += info->memory; isend[4] += info->mallocs; 1066 if (flag == MAT_LOCAL) { 1067 info->nz_used = isend[0]; 1068 info->nz_allocated = isend[1]; 1069 info->nz_unneeded = isend[2]; 1070 info->memory = isend[3]; 1071 info->mallocs = isend[4]; 1072 } else if (flag == MAT_GLOBAL_MAX) { 1073 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1074 info->nz_used = irecv[0]; 1075 info->nz_allocated = irecv[1]; 1076 info->nz_unneeded = irecv[2]; 1077 info->memory = irecv[3]; 1078 info->mallocs = irecv[4]; 1079 } else if (flag == MAT_GLOBAL_SUM) { 1080 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1081 info->nz_used = irecv[0]; 1082 info->nz_allocated = irecv[1]; 1083 info->nz_unneeded = irecv[2]; 1084 info->memory = irecv[3]; 1085 info->mallocs = irecv[4]; 1086 } else { 1087 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1088 } 1089 info->rows_global = (PetscReal)A->M; 1090 info->columns_global = (PetscReal)A->N; 1091 info->rows_local = (PetscReal)A->m; 1092 info->columns_local = (PetscReal)A->N; 1093 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1094 info->fill_ratio_needed = 0; 1095 info->factor_mallocs = 0; 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1101 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op) 1102 { 1103 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1104 PetscErrorCode ierr; 1105 1106 PetscFunctionBegin; 1107 switch (op) { 1108 case MAT_NO_NEW_NONZERO_LOCATIONS: 1109 case MAT_YES_NEW_NONZERO_LOCATIONS: 1110 case MAT_COLUMNS_UNSORTED: 1111 case MAT_COLUMNS_SORTED: 1112 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1113 case MAT_KEEP_ZEROED_ROWS: 1114 case MAT_NEW_NONZERO_LOCATION_ERR: 1115 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1116 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1117 break; 1118 case MAT_ROW_ORIENTED: 1119 a->roworiented = PETSC_TRUE; 1120 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1121 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1122 break; 1123 case MAT_ROWS_SORTED: 1124 case MAT_ROWS_UNSORTED: 1125 case MAT_YES_NEW_DIAGONALS: 1126 ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr); 1127 break; 1128 case MAT_COLUMN_ORIENTED: 1129 a->roworiented = PETSC_FALSE; 1130 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1131 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1132 break; 1133 case MAT_IGNORE_OFF_PROC_ENTRIES: 1134 a->donotstash = PETSC_TRUE; 1135 break; 1136 case MAT_NO_NEW_DIAGONALS: 1137 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1138 case MAT_USE_HASH_TABLE: 1139 a->ht_flag = PETSC_TRUE; 1140 break; 1141 case MAT_NOT_SYMMETRIC: 1142 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1143 case MAT_HERMITIAN: 1144 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1145 case MAT_SYMMETRIC: 1146 case MAT_STRUCTURALLY_SYMMETRIC: 1147 case MAT_NOT_HERMITIAN: 1148 case MAT_SYMMETRY_ETERNAL: 1149 case MAT_NOT_SYMMETRY_ETERNAL: 1150 break; 1151 default: 1152 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1153 } 1154 PetscFunctionReturn(0); 1155 } 1156 1157 #undef __FUNCT__ 1158 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1159 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B) 1160 { 1161 PetscErrorCode ierr; 1162 PetscFunctionBegin; 1163 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1169 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1170 { 1171 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1172 Mat a=baij->A, b=baij->B; 1173 PetscErrorCode ierr; 1174 PetscInt nv,m,n; 1175 PetscTruth flg; 1176 1177 PetscFunctionBegin; 1178 if (ll != rr){ 1179 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1180 if (!flg) 1181 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1182 } 1183 if (!ll) PetscFunctionReturn(0); 1184 1185 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1186 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1187 1188 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1189 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1190 1191 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1192 1193 /* left diagonalscale the off-diagonal part */ 1194 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1195 1196 /* scale the diagonal part */ 1197 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1198 1199 /* right diagonalscale the off-diagonal part */ 1200 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1201 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1207 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A) 1208 { 1209 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1210 MPI_Comm comm = A->comm; 1211 static PetscTruth called = PETSC_FALSE; 1212 PetscErrorCode ierr; 1213 1214 PetscFunctionBegin; 1215 if (!a->rank) { 1216 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1217 } 1218 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1219 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1220 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1221 PetscFunctionReturn(0); 1222 } 1223 1224 #undef __FUNCT__ 1225 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1226 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1227 { 1228 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1229 PetscErrorCode ierr; 1230 1231 PetscFunctionBegin; 1232 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1237 1238 #undef __FUNCT__ 1239 #define __FUNCT__ "MatEqual_MPISBAIJ" 1240 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1241 { 1242 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1243 Mat a,b,c,d; 1244 PetscTruth flg; 1245 PetscErrorCode ierr; 1246 1247 PetscFunctionBegin; 1248 a = matA->A; b = matA->B; 1249 c = matB->A; d = matB->B; 1250 1251 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1252 if (flg) { 1253 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1254 } 1255 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1261 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1262 { 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1272 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1273 { 1274 PetscErrorCode ierr; 1275 PetscInt i; 1276 PetscTruth flg; 1277 1278 PetscFunctionBegin; 1279 for (i=0; i<n; i++) { 1280 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1281 if (!flg) { 1282 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1283 } 1284 } 1285 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 1290 /* -------------------------------------------------------------------*/ 1291 static struct _MatOps MatOps_Values = { 1292 MatSetValues_MPISBAIJ, 1293 MatGetRow_MPISBAIJ, 1294 MatRestoreRow_MPISBAIJ, 1295 MatMult_MPISBAIJ, 1296 /* 4*/ MatMultAdd_MPISBAIJ, 1297 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1298 MatMultAdd_MPISBAIJ, 1299 0, 1300 0, 1301 0, 1302 /*10*/ 0, 1303 0, 1304 0, 1305 MatRelax_MPISBAIJ, 1306 MatTranspose_MPISBAIJ, 1307 /*15*/ MatGetInfo_MPISBAIJ, 1308 MatEqual_MPISBAIJ, 1309 MatGetDiagonal_MPISBAIJ, 1310 MatDiagonalScale_MPISBAIJ, 1311 MatNorm_MPISBAIJ, 1312 /*20*/ MatAssemblyBegin_MPISBAIJ, 1313 MatAssemblyEnd_MPISBAIJ, 1314 0, 1315 MatSetOption_MPISBAIJ, 1316 MatZeroEntries_MPISBAIJ, 1317 /*25*/ 0, 1318 0, 1319 0, 1320 0, 1321 0, 1322 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1323 0, 1324 0, 1325 0, 1326 0, 1327 /*35*/ MatDuplicate_MPISBAIJ, 1328 0, 1329 0, 1330 0, 1331 0, 1332 /*40*/ 0, 1333 MatGetSubMatrices_MPISBAIJ, 1334 MatIncreaseOverlap_MPISBAIJ, 1335 MatGetValues_MPISBAIJ, 1336 0, 1337 /*45*/ MatPrintHelp_MPISBAIJ, 1338 MatScale_MPISBAIJ, 1339 0, 1340 0, 1341 0, 1342 /*50*/ 0, 1343 0, 1344 0, 1345 0, 1346 0, 1347 /*55*/ 0, 1348 0, 1349 MatSetUnfactored_MPISBAIJ, 1350 0, 1351 MatSetValuesBlocked_MPISBAIJ, 1352 /*60*/ 0, 1353 0, 1354 0, 1355 MatGetPetscMaps_Petsc, 1356 0, 1357 /*65*/ 0, 1358 0, 1359 0, 1360 0, 1361 0, 1362 /*70*/ MatGetRowMax_MPISBAIJ, 1363 0, 1364 0, 1365 0, 1366 0, 1367 /*75*/ 0, 1368 0, 1369 0, 1370 0, 1371 0, 1372 /*80*/ 0, 1373 0, 1374 0, 1375 0, 1376 MatLoad_MPISBAIJ, 1377 /*85*/ 0, 1378 0, 1379 0, 1380 0, 1381 0, 1382 /*90*/ 0, 1383 0, 1384 0, 1385 0, 1386 0, 1387 /*95*/ 0, 1388 0, 1389 0, 1390 0}; 1391 1392 1393 EXTERN_C_BEGIN 1394 #undef __FUNCT__ 1395 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1396 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1397 { 1398 PetscFunctionBegin; 1399 *a = ((Mat_MPISBAIJ *)A->data)->A; 1400 *iscopy = PETSC_FALSE; 1401 PetscFunctionReturn(0); 1402 } 1403 EXTERN_C_END 1404 1405 EXTERN_C_BEGIN 1406 #undef __FUNCT__ 1407 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1408 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1409 { 1410 Mat_MPISBAIJ *b; 1411 PetscErrorCode ierr; 1412 PetscInt i,mbs,Mbs; 1413 1414 PetscFunctionBegin; 1415 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1416 1417 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1418 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1419 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1420 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1421 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1422 if (d_nnz) { 1423 for (i=0; i<B->m/bs; i++) { 1424 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1425 } 1426 } 1427 if (o_nnz) { 1428 for (i=0; i<B->m/bs; i++) { 1429 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1430 } 1431 } 1432 B->preallocated = PETSC_TRUE; 1433 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1434 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1435 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1436 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1437 1438 b = (Mat_MPISBAIJ*)B->data; 1439 mbs = B->m/bs; 1440 Mbs = B->M/bs; 1441 if (mbs*bs != B->m) { 1442 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs); 1443 } 1444 1445 B->bs = bs; 1446 b->bs2 = bs*bs; 1447 b->mbs = mbs; 1448 b->nbs = mbs; 1449 b->Mbs = Mbs; 1450 b->Nbs = Mbs; 1451 1452 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 1453 b->rowners[0] = 0; 1454 for (i=2; i<=b->size; i++) { 1455 b->rowners[i] += b->rowners[i-1]; 1456 } 1457 b->rstart = b->rowners[b->rank]; 1458 b->rend = b->rowners[b->rank+1]; 1459 b->cstart = b->rstart; 1460 b->cend = b->rend; 1461 for (i=0; i<=b->size; i++) { 1462 b->rowners_bs[i] = b->rowners[i]*bs; 1463 } 1464 b->rstart_bs = b-> rstart*bs; 1465 b->rend_bs = b->rend*bs; 1466 1467 b->cstart_bs = b->cstart*bs; 1468 b->cend_bs = b->cend*bs; 1469 1470 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1471 ierr = MatSetSizes(b->A,B->m,B->m,B->m,B->m);CHKERRQ(ierr); 1472 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1473 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1474 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1475 1476 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1477 ierr = MatSetSizes(b->B,B->m,B->M,B->m,B->M);CHKERRQ(ierr); 1478 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1479 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1480 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1481 1482 /* build cache for off array entries formed */ 1483 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1484 1485 PetscFunctionReturn(0); 1486 } 1487 EXTERN_C_END 1488 1489 /*MC 1490 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1491 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1492 1493 Options Database Keys: 1494 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1495 1496 Level: beginner 1497 1498 .seealso: MatCreateMPISBAIJ 1499 M*/ 1500 1501 EXTERN_C_BEGIN 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatCreate_MPISBAIJ" 1504 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1505 { 1506 Mat_MPISBAIJ *b; 1507 PetscErrorCode ierr; 1508 PetscTruth flg; 1509 1510 PetscFunctionBegin; 1511 1512 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1513 B->data = (void*)b; 1514 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1515 1516 B->ops->destroy = MatDestroy_MPISBAIJ; 1517 B->ops->view = MatView_MPISBAIJ; 1518 B->mapping = 0; 1519 B->factor = 0; 1520 B->assembled = PETSC_FALSE; 1521 1522 B->insertmode = NOT_SET_VALUES; 1523 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1524 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1525 1526 /* build local table of row and column ownerships */ 1527 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 1528 b->cowners = b->rowners + b->size + 2; 1529 b->rowners_bs = b->cowners + b->size + 2; 1530 ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1531 1532 /* build cache for off array entries formed */ 1533 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1534 b->donotstash = PETSC_FALSE; 1535 b->colmap = PETSC_NULL; 1536 b->garray = PETSC_NULL; 1537 b->roworiented = PETSC_TRUE; 1538 1539 #if defined(PETSC_USE_MAT_SINGLE) 1540 /* stuff for MatSetValues_XXX in single precision */ 1541 b->setvalueslen = 0; 1542 b->setvaluescopy = PETSC_NULL; 1543 #endif 1544 1545 /* stuff used in block assembly */ 1546 b->barray = 0; 1547 1548 /* stuff used for matrix vector multiply */ 1549 b->lvec = 0; 1550 b->Mvctx = 0; 1551 b->slvec0 = 0; 1552 b->slvec0b = 0; 1553 b->slvec1 = 0; 1554 b->slvec1a = 0; 1555 b->slvec1b = 0; 1556 b->sMvctx = 0; 1557 1558 /* stuff for MatGetRow() */ 1559 b->rowindices = 0; 1560 b->rowvalues = 0; 1561 b->getrowactive = PETSC_FALSE; 1562 1563 /* hash table stuff */ 1564 b->ht = 0; 1565 b->hd = 0; 1566 b->ht_size = 0; 1567 b->ht_flag = PETSC_FALSE; 1568 b->ht_fact = 0; 1569 b->ht_total_ct = 0; 1570 b->ht_insert_ct = 0; 1571 1572 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1573 if (flg) { 1574 PetscReal fact = 1.39; 1575 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1576 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1577 if (fact <= 1.0) fact = 1.39; 1578 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1579 ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr); 1580 } 1581 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1582 "MatStoreValues_MPISBAIJ", 1583 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1584 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1585 "MatRetrieveValues_MPISBAIJ", 1586 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1587 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1588 "MatGetDiagonalBlock_MPISBAIJ", 1589 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1590 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1591 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1592 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1593 B->symmetric = PETSC_TRUE; 1594 B->structurally_symmetric = PETSC_TRUE; 1595 B->symmetric_set = PETSC_TRUE; 1596 B->structurally_symmetric_set = PETSC_TRUE; 1597 PetscFunctionReturn(0); 1598 } 1599 EXTERN_C_END 1600 1601 /*MC 1602 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1603 1604 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1605 and MATMPISBAIJ otherwise. 1606 1607 Options Database Keys: 1608 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1609 1610 Level: beginner 1611 1612 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1613 M*/ 1614 1615 EXTERN_C_BEGIN 1616 #undef __FUNCT__ 1617 #define __FUNCT__ "MatCreate_SBAIJ" 1618 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1619 { 1620 PetscErrorCode ierr; 1621 PetscMPIInt size; 1622 1623 PetscFunctionBegin; 1624 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1625 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1626 if (size == 1) { 1627 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1628 } else { 1629 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1630 } 1631 PetscFunctionReturn(0); 1632 } 1633 EXTERN_C_END 1634 1635 #undef __FUNCT__ 1636 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1637 /*@C 1638 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1639 the user should preallocate the matrix storage by setting the parameters 1640 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1641 performance can be increased by more than a factor of 50. 1642 1643 Collective on Mat 1644 1645 Input Parameters: 1646 + A - the matrix 1647 . bs - size of blockk 1648 . d_nz - number of block nonzeros per block row in diagonal portion of local 1649 submatrix (same for all local rows) 1650 . d_nnz - array containing the number of block nonzeros in the various block rows 1651 in the upper triangular and diagonal part of the in diagonal portion of the local 1652 (possibly different for each block row) or PETSC_NULL. You must leave room 1653 for the diagonal entry even if it is zero. 1654 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1655 submatrix (same for all local rows). 1656 - o_nnz - array containing the number of nonzeros in the various block rows of the 1657 off-diagonal portion of the local submatrix (possibly different for 1658 each block row) or PETSC_NULL. 1659 1660 1661 Options Database Keys: 1662 . -mat_no_unroll - uses code that does not unroll the loops in the 1663 block calculations (much slower) 1664 . -mat_block_size - size of the blocks to use 1665 1666 Notes: 1667 1668 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1669 than it must be used on all processors that share the object for that argument. 1670 1671 If the *_nnz parameter is given then the *_nz parameter is ignored 1672 1673 Storage Information: 1674 For a square global matrix we define each processor's diagonal portion 1675 to be its local rows and the corresponding columns (a square submatrix); 1676 each processor's off-diagonal portion encompasses the remainder of the 1677 local matrix (a rectangular submatrix). 1678 1679 The user can specify preallocated storage for the diagonal part of 1680 the local submatrix with either d_nz or d_nnz (not both). Set 1681 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1682 memory allocation. Likewise, specify preallocated storage for the 1683 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1684 1685 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1686 the figure below we depict these three local rows and all columns (0-11). 1687 1688 .vb 1689 0 1 2 3 4 5 6 7 8 9 10 11 1690 ------------------- 1691 row 3 | o o o d d d o o o o o o 1692 row 4 | o o o d d d o o o o o o 1693 row 5 | o o o d d d o o o o o o 1694 ------------------- 1695 .ve 1696 1697 Thus, any entries in the d locations are stored in the d (diagonal) 1698 submatrix, and any entries in the o locations are stored in the 1699 o (off-diagonal) submatrix. Note that the d matrix is stored in 1700 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1701 1702 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1703 plus the diagonal part of the d matrix, 1704 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1705 In general, for PDE problems in which most nonzeros are near the diagonal, 1706 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1707 or you will get TERRIBLE performance; see the users' manual chapter on 1708 matrices. 1709 1710 Level: intermediate 1711 1712 .keywords: matrix, block, aij, compressed row, sparse, parallel 1713 1714 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1715 @*/ 1716 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1717 { 1718 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1719 1720 PetscFunctionBegin; 1721 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1722 if (f) { 1723 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1724 } 1725 PetscFunctionReturn(0); 1726 } 1727 1728 #undef __FUNCT__ 1729 #define __FUNCT__ "MatCreateMPISBAIJ" 1730 /*@C 1731 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1732 (block compressed row). For good matrix assembly performance 1733 the user should preallocate the matrix storage by setting the parameters 1734 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1735 performance can be increased by more than a factor of 50. 1736 1737 Collective on MPI_Comm 1738 1739 Input Parameters: 1740 + comm - MPI communicator 1741 . bs - size of blockk 1742 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1743 This value should be the same as the local size used in creating the 1744 y vector for the matrix-vector product y = Ax. 1745 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1746 This value should be the same as the local size used in creating the 1747 x vector for the matrix-vector product y = Ax. 1748 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1749 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1750 . d_nz - number of block nonzeros per block row in diagonal portion of local 1751 submatrix (same for all local rows) 1752 . d_nnz - array containing the number of block nonzeros in the various block rows 1753 in the upper triangular portion of the in diagonal portion of the local 1754 (possibly different for each block block row) or PETSC_NULL. 1755 You must leave room for the diagonal entry even if it is zero. 1756 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1757 submatrix (same for all local rows). 1758 - o_nnz - array containing the number of nonzeros in the various block rows of the 1759 off-diagonal portion of the local submatrix (possibly different for 1760 each block row) or PETSC_NULL. 1761 1762 Output Parameter: 1763 . A - the matrix 1764 1765 Options Database Keys: 1766 . -mat_no_unroll - uses code that does not unroll the loops in the 1767 block calculations (much slower) 1768 . -mat_block_size - size of the blocks to use 1769 . -mat_mpi - use the parallel matrix data structures even on one processor 1770 (defaults to using SeqBAIJ format on one processor) 1771 1772 Notes: 1773 The number of rows and columns must be divisible by blocksize. 1774 1775 The user MUST specify either the local or global matrix dimensions 1776 (possibly both). 1777 1778 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1779 than it must be used on all processors that share the object for that argument. 1780 1781 If the *_nnz parameter is given then the *_nz parameter is ignored 1782 1783 Storage Information: 1784 For a square global matrix we define each processor's diagonal portion 1785 to be its local rows and the corresponding columns (a square submatrix); 1786 each processor's off-diagonal portion encompasses the remainder of the 1787 local matrix (a rectangular submatrix). 1788 1789 The user can specify preallocated storage for the diagonal part of 1790 the local submatrix with either d_nz or d_nnz (not both). Set 1791 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1792 memory allocation. Likewise, specify preallocated storage for the 1793 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1794 1795 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1796 the figure below we depict these three local rows and all columns (0-11). 1797 1798 .vb 1799 0 1 2 3 4 5 6 7 8 9 10 11 1800 ------------------- 1801 row 3 | o o o d d d o o o o o o 1802 row 4 | o o o d d d o o o o o o 1803 row 5 | o o o d d d o o o o o o 1804 ------------------- 1805 .ve 1806 1807 Thus, any entries in the d locations are stored in the d (diagonal) 1808 submatrix, and any entries in the o locations are stored in the 1809 o (off-diagonal) submatrix. Note that the d matrix is stored in 1810 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1811 1812 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1813 plus the diagonal part of the d matrix, 1814 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1815 In general, for PDE problems in which most nonzeros are near the diagonal, 1816 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1817 or you will get TERRIBLE performance; see the users' manual chapter on 1818 matrices. 1819 1820 Level: intermediate 1821 1822 .keywords: matrix, block, aij, compressed row, sparse, parallel 1823 1824 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1825 @*/ 1826 1827 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 1828 { 1829 PetscErrorCode ierr; 1830 PetscMPIInt size; 1831 1832 PetscFunctionBegin; 1833 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1834 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1835 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1836 if (size > 1) { 1837 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1838 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1839 } else { 1840 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1841 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1842 } 1843 PetscFunctionReturn(0); 1844 } 1845 1846 1847 #undef __FUNCT__ 1848 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1849 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1850 { 1851 Mat mat; 1852 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1853 PetscErrorCode ierr; 1854 PetscInt len=0,nt,bs=matin->bs,mbs=oldmat->mbs; 1855 PetscScalar *array; 1856 1857 PetscFunctionBegin; 1858 *newmat = 0; 1859 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1860 ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr); 1861 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1862 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1863 1864 mat->factor = matin->factor; 1865 mat->preallocated = PETSC_TRUE; 1866 mat->assembled = PETSC_TRUE; 1867 mat->insertmode = NOT_SET_VALUES; 1868 1869 a = (Mat_MPISBAIJ*)mat->data; 1870 mat->bs = matin->bs; 1871 a->bs2 = oldmat->bs2; 1872 a->mbs = oldmat->mbs; 1873 a->nbs = oldmat->nbs; 1874 a->Mbs = oldmat->Mbs; 1875 a->Nbs = oldmat->Nbs; 1876 1877 a->rstart = oldmat->rstart; 1878 a->rend = oldmat->rend; 1879 a->cstart = oldmat->cstart; 1880 a->cend = oldmat->cend; 1881 a->size = oldmat->size; 1882 a->rank = oldmat->rank; 1883 a->donotstash = oldmat->donotstash; 1884 a->roworiented = oldmat->roworiented; 1885 a->rowindices = 0; 1886 a->rowvalues = 0; 1887 a->getrowactive = PETSC_FALSE; 1888 a->barray = 0; 1889 a->rstart_bs = oldmat->rstart_bs; 1890 a->rend_bs = oldmat->rend_bs; 1891 a->cstart_bs = oldmat->cstart_bs; 1892 a->cend_bs = oldmat->cend_bs; 1893 1894 /* hash table stuff */ 1895 a->ht = 0; 1896 a->hd = 0; 1897 a->ht_size = 0; 1898 a->ht_flag = oldmat->ht_flag; 1899 a->ht_fact = oldmat->ht_fact; 1900 a->ht_total_ct = 0; 1901 a->ht_insert_ct = 0; 1902 1903 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1904 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1905 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 1906 if (oldmat->colmap) { 1907 #if defined (PETSC_USE_CTABLE) 1908 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1909 #else 1910 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1911 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1912 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1913 #endif 1914 } else a->colmap = 0; 1915 1916 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 1917 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1918 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1919 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 1920 } else a->garray = 0; 1921 1922 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1923 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1924 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1925 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1926 1927 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 1928 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 1929 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 1930 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 1931 1932 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 1933 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 1934 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 1935 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 1936 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 1937 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 1938 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 1939 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 1940 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 1941 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 1942 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 1943 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 1944 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 1945 1946 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 1947 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 1948 a->sMvctx = oldmat->sMvctx; 1949 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 1950 1951 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1952 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1953 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1954 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1955 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 1956 *newmat = mat; 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #include "petscsys.h" 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatLoad_MPISBAIJ" 1964 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 1965 { 1966 Mat A; 1967 PetscErrorCode ierr; 1968 PetscInt i,nz,j,rstart,rend; 1969 PetscScalar *vals,*buf; 1970 MPI_Comm comm = ((PetscObject)viewer)->comm; 1971 MPI_Status status; 1972 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners; 1973 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1974 PetscInt *locrowlens,*procsnz = 0,jj,*mycols,*ibuf; 1975 PetscInt bs=1,Mbs,mbs,extra_rows; 1976 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 1977 PetscInt dcount,kmax,k,nzcount,tmp; 1978 int fd; 1979 1980 PetscFunctionBegin; 1981 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1982 1983 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1984 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1985 if (!rank) { 1986 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1987 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1988 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1989 if (header[3] < 0) { 1990 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 1991 } 1992 } 1993 1994 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1995 M = header[1]; N = header[2]; 1996 1997 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1998 1999 /* 2000 This code adds extra rows to make sure the number of rows is 2001 divisible by the blocksize 2002 */ 2003 Mbs = M/bs; 2004 extra_rows = bs - M + bs*(Mbs); 2005 if (extra_rows == bs) extra_rows = 0; 2006 else Mbs++; 2007 if (extra_rows &&!rank) { 2008 ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2009 } 2010 2011 /* determine ownership of all rows */ 2012 mbs = Mbs/size + ((Mbs % size) > rank); 2013 m = mbs*bs; 2014 ierr = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2015 browners = rowners + size + 1; 2016 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2017 rowners[0] = 0; 2018 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2019 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2020 rstart = rowners[rank]; 2021 rend = rowners[rank+1]; 2022 2023 /* distribute row lengths to all processors */ 2024 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2025 if (!rank) { 2026 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2027 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2028 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2029 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2030 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2031 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2032 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2033 } else { 2034 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2035 } 2036 2037 if (!rank) { /* procs[0] */ 2038 /* calculate the number of nonzeros on each processor */ 2039 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2040 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2041 for (i=0; i<size; i++) { 2042 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2043 procsnz[i] += rowlengths[j]; 2044 } 2045 } 2046 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2047 2048 /* determine max buffer needed and allocate it */ 2049 maxnz = 0; 2050 for (i=0; i<size; i++) { 2051 maxnz = PetscMax(maxnz,procsnz[i]); 2052 } 2053 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2054 2055 /* read in my part of the matrix column indices */ 2056 nz = procsnz[0]; 2057 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2058 mycols = ibuf; 2059 if (size == 1) nz -= extra_rows; 2060 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2061 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2062 2063 /* read in every ones (except the last) and ship off */ 2064 for (i=1; i<size-1; i++) { 2065 nz = procsnz[i]; 2066 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2067 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2068 } 2069 /* read in the stuff for the last proc */ 2070 if (size != 1) { 2071 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2072 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2073 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2074 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2075 } 2076 ierr = PetscFree(cols);CHKERRQ(ierr); 2077 } else { /* procs[i], i>0 */ 2078 /* determine buffer space needed for message */ 2079 nz = 0; 2080 for (i=0; i<m; i++) { 2081 nz += locrowlens[i]; 2082 } 2083 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2084 mycols = ibuf; 2085 /* receive message of column indices*/ 2086 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2087 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2088 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2089 } 2090 2091 /* loop over local rows, determining number of off diagonal entries */ 2092 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2093 odlens = dlens + (rend-rstart); 2094 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2095 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2096 masked1 = mask + Mbs; 2097 masked2 = masked1 + Mbs; 2098 rowcount = 0; nzcount = 0; 2099 for (i=0; i<mbs; i++) { 2100 dcount = 0; 2101 odcount = 0; 2102 for (j=0; j<bs; j++) { 2103 kmax = locrowlens[rowcount]; 2104 for (k=0; k<kmax; k++) { 2105 tmp = mycols[nzcount++]/bs; /* block col. index */ 2106 if (!mask[tmp]) { 2107 mask[tmp] = 1; 2108 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2109 else masked1[dcount++] = tmp; /* entry in diag portion */ 2110 } 2111 } 2112 rowcount++; 2113 } 2114 2115 dlens[i] = dcount; /* d_nzz[i] */ 2116 odlens[i] = odcount; /* o_nzz[i] */ 2117 2118 /* zero out the mask elements we set */ 2119 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2120 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2121 } 2122 2123 /* create our matrix */ 2124 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2125 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2126 ierr = MatSetType(A,type);CHKERRQ(ierr); 2127 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2128 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2129 2130 if (!rank) { 2131 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2132 /* read in my part of the matrix numerical values */ 2133 nz = procsnz[0]; 2134 vals = buf; 2135 mycols = ibuf; 2136 if (size == 1) nz -= extra_rows; 2137 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2138 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2139 2140 /* insert into matrix */ 2141 jj = rstart*bs; 2142 for (i=0; i<m; i++) { 2143 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2144 mycols += locrowlens[i]; 2145 vals += locrowlens[i]; 2146 jj++; 2147 } 2148 2149 /* read in other processors (except the last one) and ship out */ 2150 for (i=1; i<size-1; i++) { 2151 nz = procsnz[i]; 2152 vals = buf; 2153 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2154 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2155 } 2156 /* the last proc */ 2157 if (size != 1){ 2158 nz = procsnz[i] - extra_rows; 2159 vals = buf; 2160 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2161 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2162 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2163 } 2164 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2165 2166 } else { 2167 /* receive numeric values */ 2168 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2169 2170 /* receive message of values*/ 2171 vals = buf; 2172 mycols = ibuf; 2173 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2174 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2175 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2176 2177 /* insert into matrix */ 2178 jj = rstart*bs; 2179 for (i=0; i<m; i++) { 2180 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2181 mycols += locrowlens[i]; 2182 vals += locrowlens[i]; 2183 jj++; 2184 } 2185 } 2186 2187 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2188 ierr = PetscFree(buf);CHKERRQ(ierr); 2189 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2190 ierr = PetscFree(rowners);CHKERRQ(ierr); 2191 ierr = PetscFree(dlens);CHKERRQ(ierr); 2192 ierr = PetscFree(mask);CHKERRQ(ierr); 2193 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2194 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2195 *newmat = A; 2196 PetscFunctionReturn(0); 2197 } 2198 2199 #undef __FUNCT__ 2200 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2201 /*XXXXX@ 2202 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2203 2204 Input Parameters: 2205 . mat - the matrix 2206 . fact - factor 2207 2208 Collective on Mat 2209 2210 Level: advanced 2211 2212 Notes: 2213 This can also be set by the command line option: -mat_use_hash_table fact 2214 2215 .keywords: matrix, hashtable, factor, HT 2216 2217 .seealso: MatSetOption() 2218 @XXXXX*/ 2219 2220 2221 #undef __FUNCT__ 2222 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2223 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2224 { 2225 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2226 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2227 PetscReal atmp; 2228 PetscReal *work,*svalues,*rvalues; 2229 PetscErrorCode ierr; 2230 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2231 PetscMPIInt rank,size; 2232 PetscInt *rowners_bs,dest,count,source; 2233 PetscScalar *va; 2234 MatScalar *ba; 2235 MPI_Status stat; 2236 2237 PetscFunctionBegin; 2238 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2239 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2240 2241 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2242 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2243 2244 bs = A->bs; 2245 mbs = a->mbs; 2246 Mbs = a->Mbs; 2247 ba = b->a; 2248 bi = b->i; 2249 bj = b->j; 2250 2251 /* find ownerships */ 2252 rowners_bs = a->rowners_bs; 2253 2254 /* each proc creates an array to be distributed */ 2255 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2256 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2257 2258 /* row_max for B */ 2259 if (rank != size-1){ 2260 for (i=0; i<mbs; i++) { 2261 ncols = bi[1] - bi[0]; bi++; 2262 brow = bs*i; 2263 for (j=0; j<ncols; j++){ 2264 bcol = bs*(*bj); 2265 for (kcol=0; kcol<bs; kcol++){ 2266 col = bcol + kcol; /* local col index */ 2267 col += rowners_bs[rank+1]; /* global col index */ 2268 for (krow=0; krow<bs; krow++){ 2269 atmp = PetscAbsScalar(*ba); ba++; 2270 row = brow + krow; /* local row index */ 2271 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2272 if (work[col] < atmp) work[col] = atmp; 2273 } 2274 } 2275 bj++; 2276 } 2277 } 2278 2279 /* send values to its owners */ 2280 for (dest=rank+1; dest<size; dest++){ 2281 svalues = work + rowners_bs[dest]; 2282 count = rowners_bs[dest+1]-rowners_bs[dest]; 2283 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2284 } 2285 } 2286 2287 /* receive values */ 2288 if (rank){ 2289 rvalues = work; 2290 count = rowners_bs[rank+1]-rowners_bs[rank]; 2291 for (source=0; source<rank; source++){ 2292 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2293 /* process values */ 2294 for (i=0; i<count; i++){ 2295 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2296 } 2297 } 2298 } 2299 2300 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2301 ierr = PetscFree(work);CHKERRQ(ierr); 2302 PetscFunctionReturn(0); 2303 } 2304 2305 #undef __FUNCT__ 2306 #define __FUNCT__ "MatRelax_MPISBAIJ" 2307 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2308 { 2309 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2310 PetscErrorCode ierr; 2311 PetscInt mbs=mat->mbs,bs=matin->bs; 2312 PetscScalar *x,*b,*ptr,zero=0.0; 2313 Vec bb1; 2314 2315 PetscFunctionBegin; 2316 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2317 if (bs > 1) 2318 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2319 2320 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2321 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2322 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2323 its--; 2324 } 2325 2326 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2327 while (its--){ 2328 2329 /* lower triangular part: slvec0b = - B^T*xx */ 2330 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2331 2332 /* copy xx into slvec0a */ 2333 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2334 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2335 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2336 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2337 2338 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2339 2340 /* copy bb into slvec1a */ 2341 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2342 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2343 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2344 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2345 2346 /* set slvec1b = 0 */ 2347 ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr); 2348 2349 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2350 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2351 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2352 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2353 2354 /* upper triangular part: bb1 = bb1 - B*x */ 2355 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2356 2357 /* local diagonal sweep */ 2358 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2359 } 2360 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2361 } else { 2362 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2363 } 2364 PetscFunctionReturn(0); 2365 } 2366 2367 #undef __FUNCT__ 2368 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2369 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2370 { 2371 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2372 PetscErrorCode ierr; 2373 Vec lvec1,bb1; 2374 2375 PetscFunctionBegin; 2376 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2377 if (matin->bs > 1) 2378 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2379 2380 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2381 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2382 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2383 its--; 2384 } 2385 2386 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2387 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2388 while (its--){ 2389 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2390 2391 /* lower diagonal part: bb1 = bb - B^T*xx */ 2392 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2393 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2394 2395 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2396 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2397 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2398 2399 /* upper diagonal part: bb1 = bb1 - B*x */ 2400 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2401 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2402 2403 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2404 2405 /* diagonal sweep */ 2406 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2407 } 2408 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2409 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2410 } else { 2411 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2412 } 2413 PetscFunctionReturn(0); 2414 } 2415 2416