1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec); 22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 103 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \ 104 N = nrow++ - 1; \ 105 /* shift up all the later entries in this row */ \ 106 for (ii=N; ii>=_i; ii--) { \ 107 rp[ii+1] = rp[ii]; \ 108 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 109 } \ 110 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 111 rp[_i] = bcol; \ 112 ap[bs2*_i + bs*cidx + ridx] = value; \ 113 a_noinsert:; \ 114 ailen[brow] = nrow; \ 115 } 116 #ifndef MatSetValues_SeqBAIJ_B_Private 117 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 118 { \ 119 brow = row/bs; \ 120 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 121 rmax = bimax[brow]; nrow = bilen[brow]; \ 122 bcol = col/bs; \ 123 ridx = row % bs; cidx = col % bs; \ 124 low = 0; high = nrow; \ 125 while (high-low > 3) { \ 126 t = (low+high)/2; \ 127 if (rp[t] > bcol) high = t; \ 128 else low = t; \ 129 } \ 130 for (_i=low; _i<high; _i++) { \ 131 if (rp[_i] > bcol) break; \ 132 if (rp[_i] == bcol) { \ 133 bap = ap + bs2*_i + bs*cidx + ridx; \ 134 if (addv == ADD_VALUES) *bap += value; \ 135 else *bap = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (b->nonew == 1) goto b_noinsert; \ 140 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 141 MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 b_noinsert:; \ 152 bilen[brow] = nrow; \ 153 } 154 #endif 155 156 #if defined(PETSC_USE_MAT_SINGLE) 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatSetValues_MPISBAIJ" 159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 160 { 161 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,N = m*n; 164 MatScalar *vsingle; 165 166 PetscFunctionBegin; 167 if (N > b->setvalueslen) { 168 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 169 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 170 b->setvalueslen = N; 171 } 172 vsingle = b->setvaluescopy; 173 174 for (i=0; i<N; i++) { 175 vsingle[i] = v[i]; 176 } 177 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 184 { 185 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 186 PetscErrorCode ierr; 187 PetscInt i,N = m*n*b->bs2; 188 MatScalar *vsingle; 189 190 PetscFunctionBegin; 191 if (N > b->setvalueslen) { 192 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 193 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 194 b->setvalueslen = N; 195 } 196 vsingle = b->setvaluescopy; 197 for (i=0; i<N; i++) { 198 vsingle[i] = v[i]; 199 } 200 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 201 PetscFunctionReturn(0); 202 } 203 #endif 204 205 /* Only add/insert a(i,j) with i<=j (blocks). 206 Any a(i,j) with i>j input by user is ingored. 207 */ 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 211 { 212 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 213 MatScalar value; 214 PetscTruth roworiented = baij->roworiented; 215 PetscErrorCode ierr; 216 PetscInt i,j,row,col; 217 PetscInt rstart_orig=baij->rstart_bs; 218 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 219 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 220 221 /* Some Variables required in the macro */ 222 Mat A = baij->A; 223 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 224 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 225 MatScalar *aa=a->a; 226 227 Mat B = baij->B; 228 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 229 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 230 MatScalar *ba=b->a; 231 232 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 233 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 234 MatScalar *ap,*bap; 235 236 /* for stash */ 237 PetscInt n_loc, *in_loc=0; 238 MatScalar *v_loc=0; 239 240 PetscFunctionBegin; 241 242 if(!baij->donotstash){ 243 ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr); 244 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 245 } 246 247 for (i=0; i<m; i++) { 248 if (im[i] < 0) continue; 249 #if defined(PETSC_USE_DEBUG) 250 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 251 #endif 252 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 253 row = im[i] - rstart_orig; /* local row index */ 254 for (j=0; j<n; j++) { 255 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 256 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 257 col = in[j] - cstart_orig; /* local col index */ 258 brow = row/bs; bcol = col/bs; 259 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 260 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 261 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 262 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 263 } else if (in[j] < 0) continue; 264 #if defined(PETSC_USE_DEBUG) 265 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 266 #endif 267 else { /* off-diag entry (B) */ 268 if (mat->was_assembled) { 269 if (!baij->colmap) { 270 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 271 } 272 #if defined (PETSC_USE_CTABLE) 273 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 274 col = col - 1; 275 #else 276 col = baij->colmap[in[j]/bs] - 1; 277 #endif 278 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 279 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 280 col = in[j]; 281 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 282 B = baij->B; 283 b = (Mat_SeqBAIJ*)(B)->data; 284 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 285 ba=b->a; 286 } else col += in[j]%bs; 287 } else col = in[j]; 288 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 289 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 290 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 291 } 292 } 293 } else { /* off processor entry */ 294 if (!baij->donotstash) { 295 n_loc = 0; 296 for (j=0; j<n; j++){ 297 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 298 in_loc[n_loc] = in[j]; 299 if (roworiented) { 300 v_loc[n_loc] = v[i*n+j]; 301 } else { 302 v_loc[n_loc] = v[j*m+i]; 303 } 304 n_loc++; 305 } 306 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 307 } 308 } 309 } 310 311 if(!baij->donotstash){ 312 ierr = PetscFree(in_loc);CHKERRQ(ierr); 313 ierr = PetscFree(v_loc);CHKERRQ(ierr); 314 } 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 320 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 321 { 322 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 323 const MatScalar *value; 324 MatScalar *barray=baij->barray; 325 PetscTruth roworiented = baij->roworiented; 326 PetscErrorCode ierr; 327 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 328 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 329 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 330 331 PetscFunctionBegin; 332 if(!barray) { 333 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 334 baij->barray = barray; 335 } 336 337 if (roworiented) { 338 stepval = (n-1)*bs; 339 } else { 340 stepval = (m-1)*bs; 341 } 342 for (i=0; i<m; i++) { 343 if (im[i] < 0) continue; 344 #if defined(PETSC_USE_DEBUG) 345 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 346 #endif 347 if (im[i] >= rstart && im[i] < rend) { 348 row = im[i] - rstart; 349 for (j=0; j<n; j++) { 350 /* If NumCol = 1 then a copy is not required */ 351 if ((roworiented) && (n == 1)) { 352 barray = (MatScalar*) v + i*bs2; 353 } else if((!roworiented) && (m == 1)) { 354 barray = (MatScalar*) v + j*bs2; 355 } else { /* Here a copy is required */ 356 if (roworiented) { 357 value = v + i*(stepval+bs)*bs + j*bs; 358 } else { 359 value = v + j*(stepval+bs)*bs + i*bs; 360 } 361 for (ii=0; ii<bs; ii++,value+=stepval) { 362 for (jj=0; jj<bs; jj++) { 363 *barray++ = *value++; 364 } 365 } 366 barray -=bs2; 367 } 368 369 if (in[j] >= cstart && in[j] < cend){ 370 col = in[j] - cstart; 371 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 372 } 373 else if (in[j] < 0) continue; 374 #if defined(PETSC_USE_DEBUG) 375 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 376 #endif 377 else { 378 if (mat->was_assembled) { 379 if (!baij->colmap) { 380 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 381 } 382 383 #if defined(PETSC_USE_DEBUG) 384 #if defined (PETSC_USE_CTABLE) 385 { PetscInt data; 386 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 387 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 388 } 389 #else 390 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 391 #endif 392 #endif 393 #if defined (PETSC_USE_CTABLE) 394 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 395 col = (col - 1)/bs; 396 #else 397 col = (baij->colmap[in[j]] - 1)/bs; 398 #endif 399 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 400 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 401 col = in[j]; 402 } 403 } 404 else col = in[j]; 405 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 406 } 407 } 408 } else { 409 if (!baij->donotstash) { 410 if (roworiented) { 411 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 412 } else { 413 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 414 } 415 } 416 } 417 } 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatGetValues_MPISBAIJ" 423 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 424 { 425 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 426 PetscErrorCode ierr; 427 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 428 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 429 430 PetscFunctionBegin; 431 for (i=0; i<m; i++) { 432 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 433 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 434 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 435 row = idxm[i] - bsrstart; 436 for (j=0; j<n; j++) { 437 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); 438 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 439 if (idxn[j] >= bscstart && idxn[j] < bscend){ 440 col = idxn[j] - bscstart; 441 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } else { 443 if (!baij->colmap) { 444 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 445 } 446 #if defined (PETSC_USE_CTABLE) 447 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 448 data --; 449 #else 450 data = baij->colmap[idxn[j]/bs]-1; 451 #endif 452 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 453 else { 454 col = data + idxn[j]%bs; 455 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 456 } 457 } 458 } 459 } else { 460 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 461 } 462 } 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatNorm_MPISBAIJ" 468 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 469 { 470 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 471 PetscErrorCode ierr; 472 PetscReal sum[2],*lnorm2; 473 474 PetscFunctionBegin; 475 if (baij->size == 1) { 476 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 477 } else { 478 if (type == NORM_FROBENIUS) { 479 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 480 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 481 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 482 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 483 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 484 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 485 *norm = sqrt(sum[0] + 2*sum[1]); 486 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 487 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 488 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 489 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 490 PetscReal *rsum,*rsum2,vabs; 491 PetscInt *jj,*garray=baij->garray,rstart=baij->rstart,nz; 492 PetscInt brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs; 493 MatScalar *v; 494 495 ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr); 496 rsum2 = rsum + mat->N; 497 ierr = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 498 /* Amat */ 499 v = amat->a; jj = amat->j; 500 for (brow=0; brow<mbs; brow++) { 501 grow = bs*(rstart + brow); 502 nz = amat->i[brow+1] - amat->i[brow]; 503 for (bcol=0; bcol<nz; bcol++){ 504 gcol = bs*(rstart + *jj); jj++; 505 for (col=0; col<bs; col++){ 506 for (row=0; row<bs; row++){ 507 vabs = PetscAbsScalar(*v); v++; 508 rsum[gcol+col] += vabs; 509 /* non-diagonal block */ 510 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 511 } 512 } 513 } 514 } 515 /* Bmat */ 516 v = bmat->a; jj = bmat->j; 517 for (brow=0; brow<mbs; brow++) { 518 grow = bs*(rstart + brow); 519 nz = bmat->i[brow+1] - bmat->i[brow]; 520 for (bcol=0; bcol<nz; bcol++){ 521 gcol = bs*garray[*jj]; jj++; 522 for (col=0; col<bs; col++){ 523 for (row=0; row<bs; row++){ 524 vabs = PetscAbsScalar(*v); v++; 525 rsum[gcol+col] += vabs; 526 rsum[grow+row] += vabs; 527 } 528 } 529 } 530 } 531 ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 532 *norm = 0.0; 533 for (col=0; col<mat->N; col++) { 534 if (rsum2[col] > *norm) *norm = rsum2[col]; 535 } 536 ierr = PetscFree(rsum);CHKERRQ(ierr); 537 } else { 538 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 539 } 540 } 541 PetscFunctionReturn(0); 542 } 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 546 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 547 { 548 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 549 PetscErrorCode ierr; 550 PetscInt nstash,reallocs; 551 InsertMode addv; 552 553 PetscFunctionBegin; 554 if (baij->donotstash) { 555 PetscFunctionReturn(0); 556 } 557 558 /* make sure all processors are either in INSERTMODE or ADDMODE */ 559 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 560 if (addv == (ADD_VALUES|INSERT_VALUES)) { 561 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 562 } 563 mat->insertmode = addv; /* in case this processor had no cache */ 564 565 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 566 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 567 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 568 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 569 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 570 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 576 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 577 { 578 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 579 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 580 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 581 PetscErrorCode ierr; 582 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 583 PetscInt *row,*col,other_disassembled; 584 PetscMPIInt n; 585 PetscTruth r1,r2,r3; 586 MatScalar *val; 587 InsertMode addv = mat->insertmode; 588 589 PetscFunctionBegin; 590 591 if (!baij->donotstash) { 592 while (1) { 593 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 594 if (!flg) break; 595 596 for (i=0; i<n;) { 597 /* Now identify the consecutive vals belonging to the same row */ 598 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 599 if (j < n) ncols = j-i; 600 else ncols = n-i; 601 /* Now assemble all these values with a single function call */ 602 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 603 i = j; 604 } 605 } 606 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 607 /* Now process the block-stash. Since the values are stashed column-oriented, 608 set the roworiented flag to column oriented, and after MatSetValues() 609 restore the original flags */ 610 r1 = baij->roworiented; 611 r2 = a->roworiented; 612 r3 = b->roworiented; 613 baij->roworiented = PETSC_FALSE; 614 a->roworiented = PETSC_FALSE; 615 b->roworiented = PETSC_FALSE; 616 while (1) { 617 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 618 if (!flg) break; 619 620 for (i=0; i<n;) { 621 /* Now identify the consecutive vals belonging to the same row */ 622 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 623 if (j < n) ncols = j-i; 624 else ncols = n-i; 625 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 626 i = j; 627 } 628 } 629 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 630 baij->roworiented = r1; 631 a->roworiented = r2; 632 b->roworiented = r3; 633 } 634 635 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 636 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 637 638 /* determine if any processor has disassembled, if so we must 639 also disassemble ourselfs, in order that we may reassemble. */ 640 /* 641 if nonzero structure of submatrix B cannot change then we know that 642 no processor disassembled thus we can skip this stuff 643 */ 644 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 645 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 646 if (mat->was_assembled && !other_disassembled) { 647 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 648 } 649 } 650 651 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 652 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 653 } 654 b->compressedrow.use = PETSC_TRUE; 655 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 656 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 657 658 if (baij->rowvalues) { 659 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 660 baij->rowvalues = 0; 661 } 662 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 668 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 669 { 670 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 671 PetscErrorCode ierr; 672 PetscInt bs = mat->bs; 673 PetscMPIInt size = baij->size,rank = baij->rank; 674 PetscTruth iascii,isdraw; 675 PetscViewer sviewer; 676 PetscViewerFormat format; 677 678 PetscFunctionBegin; 679 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 680 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 681 if (iascii) { 682 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 683 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 684 MatInfo info; 685 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 686 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 687 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 688 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 689 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 690 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 692 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 694 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 695 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 696 PetscFunctionReturn(0); 697 } else if (format == PETSC_VIEWER_ASCII_INFO) { 698 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } 701 } 702 703 if (isdraw) { 704 PetscDraw draw; 705 PetscTruth isnull; 706 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 707 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 708 } 709 710 if (size == 1) { 711 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 712 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 713 } else { 714 /* assemble the entire matrix onto first processor. */ 715 Mat A; 716 Mat_SeqSBAIJ *Aloc; 717 Mat_SeqBAIJ *Bloc; 718 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 719 MatScalar *a; 720 721 /* Should this be the same type as mat? */ 722 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 723 if (!rank) { 724 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 725 } else { 726 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 727 } 728 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 729 ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 730 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 731 732 /* copy over the A part */ 733 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 734 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 735 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 736 737 for (i=0; i<mbs; i++) { 738 rvals[0] = bs*(baij->rstart + i); 739 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 740 for (j=ai[i]; j<ai[i+1]; j++) { 741 col = (baij->cstart+aj[j])*bs; 742 for (k=0; k<bs; k++) { 743 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 744 col++; a += bs; 745 } 746 } 747 } 748 /* copy over the B part */ 749 Bloc = (Mat_SeqBAIJ*)baij->B->data; 750 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 751 for (i=0; i<mbs; i++) { 752 rvals[0] = bs*(baij->rstart + i); 753 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 754 for (j=ai[i]; j<ai[i+1]; j++) { 755 col = baij->garray[aj[j]]*bs; 756 for (k=0; k<bs; k++) { 757 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 758 col++; a += bs; 759 } 760 } 761 } 762 ierr = PetscFree(rvals);CHKERRQ(ierr); 763 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 764 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 765 /* 766 Everyone has to call to draw the matrix since the graphics waits are 767 synchronized across all processors that share the PetscDraw object 768 */ 769 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 770 if (!rank) { 771 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 772 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 773 } 774 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 775 ierr = MatDestroy(A);CHKERRQ(ierr); 776 } 777 PetscFunctionReturn(0); 778 } 779 780 #undef __FUNCT__ 781 #define __FUNCT__ "MatView_MPISBAIJ" 782 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 783 { 784 PetscErrorCode ierr; 785 PetscTruth iascii,isdraw,issocket,isbinary; 786 787 PetscFunctionBegin; 788 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 789 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 790 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 791 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 792 if (iascii || isdraw || issocket || isbinary) { 793 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 794 } else { 795 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 796 } 797 PetscFunctionReturn(0); 798 } 799 800 #undef __FUNCT__ 801 #define __FUNCT__ "MatDestroy_MPISBAIJ" 802 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 803 { 804 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 805 PetscErrorCode ierr; 806 807 PetscFunctionBegin; 808 #if defined(PETSC_USE_LOG) 809 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 810 #endif 811 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 812 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 813 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 814 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 815 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 816 #if defined (PETSC_USE_CTABLE) 817 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 818 #else 819 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 820 #endif 821 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 822 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 823 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 824 if (baij->slvec0) { 825 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 826 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 827 } 828 if (baij->slvec1) { 829 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 830 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 831 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 832 } 833 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 834 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 835 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 836 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 837 #if defined(PETSC_USE_MAT_SINGLE) 838 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 839 #endif 840 ierr = PetscFree(baij);CHKERRQ(ierr); 841 842 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 843 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 844 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 845 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 846 PetscFunctionReturn(0); 847 } 848 849 #undef __FUNCT__ 850 #define __FUNCT__ "MatMult_MPISBAIJ" 851 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 852 { 853 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 854 PetscErrorCode ierr; 855 PetscInt nt,mbs=a->mbs,bs=A->bs; 856 PetscScalar *x,*from,zero=0.0; 857 858 PetscFunctionBegin; 859 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 860 if (nt != A->n) { 861 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 862 } 863 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 864 if (nt != A->m) { 865 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 866 } 867 868 /* diagonal part */ 869 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 870 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 871 872 /* subdiagonal part */ 873 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 874 875 /* copy x into the vec slvec0 */ 876 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 877 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 878 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 879 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 880 881 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 882 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 883 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 884 885 /* supperdiagonal part */ 886 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 887 888 PetscFunctionReturn(0); 889 } 890 891 #undef __FUNCT__ 892 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 893 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 894 { 895 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 896 PetscErrorCode ierr; 897 PetscInt nt; 898 899 PetscFunctionBegin; 900 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 901 if (nt != A->n) { 902 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 903 } 904 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 905 if (nt != A->m) { 906 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 907 } 908 909 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 910 /* do diagonal part */ 911 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 912 /* do supperdiagonal part */ 913 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 914 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 915 /* do subdiagonal part */ 916 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 917 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 918 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 919 920 PetscFunctionReturn(0); 921 } 922 923 #undef __FUNCT__ 924 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 925 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 926 { 927 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 928 PetscErrorCode ierr; 929 PetscInt mbs=a->mbs,bs=A->bs; 930 PetscScalar *x,*from,zero=0.0; 931 932 PetscFunctionBegin; 933 /* 934 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 935 PetscSynchronizedFlush(A->comm); 936 */ 937 /* diagonal part */ 938 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 939 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 940 941 /* subdiagonal part */ 942 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 943 944 /* copy x into the vec slvec0 */ 945 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 946 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 947 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 948 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 949 950 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 951 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 952 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 953 954 /* supperdiagonal part */ 955 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 956 957 PetscFunctionReturn(0); 958 } 959 960 #undef __FUNCT__ 961 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 962 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 963 { 964 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 965 PetscErrorCode ierr; 966 967 PetscFunctionBegin; 968 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 969 /* do diagonal part */ 970 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 971 /* do supperdiagonal part */ 972 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 973 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 974 975 /* do subdiagonal part */ 976 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 977 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 978 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 979 980 PetscFunctionReturn(0); 981 } 982 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 985 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 986 { 987 PetscErrorCode ierr; 988 989 PetscFunctionBegin; 990 ierr = MatMult(A,xx,yy);CHKERRQ(ierr); 991 PetscFunctionReturn(0); 992 } 993 994 #undef __FUNCT__ 995 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 996 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 997 { 998 PetscErrorCode ierr; 999 1000 PetscFunctionBegin; 1001 ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 /* 1006 This only works correctly for square matrices where the subblock A->A is the 1007 diagonal block 1008 */ 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1011 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1012 { 1013 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1014 PetscErrorCode ierr; 1015 1016 PetscFunctionBegin; 1017 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1018 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1019 PetscFunctionReturn(0); 1020 } 1021 1022 #undef __FUNCT__ 1023 #define __FUNCT__ "MatScale_MPISBAIJ" 1024 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1025 { 1026 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1027 PetscErrorCode ierr; 1028 1029 PetscFunctionBegin; 1030 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1031 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1032 PetscFunctionReturn(0); 1033 } 1034 1035 #undef __FUNCT__ 1036 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1037 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1038 { 1039 PetscFunctionBegin; 1040 if (matin) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format"); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1046 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1047 { 1048 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1049 1050 PetscFunctionBegin; 1051 if (!baij->getrowactive) { 1052 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1053 } 1054 baij->getrowactive = PETSC_FALSE; 1055 PetscFunctionReturn(0); 1056 } 1057 1058 #undef __FUNCT__ 1059 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1060 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1061 { 1062 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1063 PetscErrorCode ierr; 1064 1065 PetscFunctionBegin; 1066 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1067 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1073 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1074 { 1075 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1076 Mat A = a->A,B = a->B; 1077 PetscErrorCode ierr; 1078 PetscReal isend[5],irecv[5]; 1079 1080 PetscFunctionBegin; 1081 info->block_size = (PetscReal)matin->bs; 1082 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1083 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1084 isend[3] = info->memory; isend[4] = info->mallocs; 1085 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1086 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1087 isend[3] += info->memory; isend[4] += info->mallocs; 1088 if (flag == MAT_LOCAL) { 1089 info->nz_used = isend[0]; 1090 info->nz_allocated = isend[1]; 1091 info->nz_unneeded = isend[2]; 1092 info->memory = isend[3]; 1093 info->mallocs = isend[4]; 1094 } else if (flag == MAT_GLOBAL_MAX) { 1095 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1096 info->nz_used = irecv[0]; 1097 info->nz_allocated = irecv[1]; 1098 info->nz_unneeded = irecv[2]; 1099 info->memory = irecv[3]; 1100 info->mallocs = irecv[4]; 1101 } else if (flag == MAT_GLOBAL_SUM) { 1102 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1103 info->nz_used = irecv[0]; 1104 info->nz_allocated = irecv[1]; 1105 info->nz_unneeded = irecv[2]; 1106 info->memory = irecv[3]; 1107 info->mallocs = irecv[4]; 1108 } else { 1109 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1110 } 1111 info->rows_global = (PetscReal)A->M; 1112 info->columns_global = (PetscReal)A->N; 1113 info->rows_local = (PetscReal)A->m; 1114 info->columns_local = (PetscReal)A->N; 1115 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1116 info->fill_ratio_needed = 0; 1117 info->factor_mallocs = 0; 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1123 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op) 1124 { 1125 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1126 PetscErrorCode ierr; 1127 1128 PetscFunctionBegin; 1129 switch (op) { 1130 case MAT_NO_NEW_NONZERO_LOCATIONS: 1131 case MAT_YES_NEW_NONZERO_LOCATIONS: 1132 case MAT_COLUMNS_UNSORTED: 1133 case MAT_COLUMNS_SORTED: 1134 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1135 case MAT_KEEP_ZEROED_ROWS: 1136 case MAT_NEW_NONZERO_LOCATION_ERR: 1137 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1138 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1139 break; 1140 case MAT_ROW_ORIENTED: 1141 a->roworiented = PETSC_TRUE; 1142 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1143 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1144 break; 1145 case MAT_ROWS_SORTED: 1146 case MAT_ROWS_UNSORTED: 1147 case MAT_YES_NEW_DIAGONALS: 1148 ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr); 1149 break; 1150 case MAT_COLUMN_ORIENTED: 1151 a->roworiented = PETSC_FALSE; 1152 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1153 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1154 break; 1155 case MAT_IGNORE_OFF_PROC_ENTRIES: 1156 a->donotstash = PETSC_TRUE; 1157 break; 1158 case MAT_NO_NEW_DIAGONALS: 1159 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1160 case MAT_USE_HASH_TABLE: 1161 a->ht_flag = PETSC_TRUE; 1162 break; 1163 case MAT_NOT_SYMMETRIC: 1164 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1165 case MAT_HERMITIAN: 1166 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1167 case MAT_SYMMETRIC: 1168 case MAT_STRUCTURALLY_SYMMETRIC: 1169 case MAT_NOT_HERMITIAN: 1170 case MAT_SYMMETRY_ETERNAL: 1171 case MAT_NOT_SYMMETRY_ETERNAL: 1172 break; 1173 default: 1174 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1175 } 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1181 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B) 1182 { 1183 PetscErrorCode ierr; 1184 PetscFunctionBegin; 1185 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 1189 #undef __FUNCT__ 1190 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1191 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1192 { 1193 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1194 Mat a=baij->A, b=baij->B; 1195 PetscErrorCode ierr; 1196 PetscInt nv,m,n; 1197 PetscTruth flg; 1198 1199 PetscFunctionBegin; 1200 if (ll != rr){ 1201 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1202 if (!flg) 1203 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1204 } 1205 if (!ll) PetscFunctionReturn(0); 1206 1207 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1208 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1209 1210 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1211 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1212 1213 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1214 1215 /* left diagonalscale the off-diagonal part */ 1216 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1217 1218 /* scale the diagonal part */ 1219 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1220 1221 /* right diagonalscale the off-diagonal part */ 1222 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1223 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #undef __FUNCT__ 1228 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1229 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A) 1230 { 1231 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1232 MPI_Comm comm = A->comm; 1233 static PetscTruth called = PETSC_FALSE; 1234 PetscErrorCode ierr; 1235 1236 PetscFunctionBegin; 1237 if (!a->rank) { 1238 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1239 } 1240 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1241 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1242 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 1246 #undef __FUNCT__ 1247 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1248 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1249 { 1250 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1251 PetscErrorCode ierr; 1252 1253 PetscFunctionBegin; 1254 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1259 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "MatEqual_MPISBAIJ" 1262 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1263 { 1264 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1265 Mat a,b,c,d; 1266 PetscTruth flg; 1267 PetscErrorCode ierr; 1268 1269 PetscFunctionBegin; 1270 a = matA->A; b = matA->B; 1271 c = matB->A; d = matB->B; 1272 1273 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1274 if (flg) { 1275 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1276 } 1277 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1283 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1284 { 1285 PetscErrorCode ierr; 1286 1287 PetscFunctionBegin; 1288 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1294 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1295 { 1296 PetscErrorCode ierr; 1297 PetscInt i; 1298 PetscTruth flg; 1299 1300 PetscFunctionBegin; 1301 for (i=0; i<n; i++) { 1302 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1303 if (!flg) { 1304 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1305 } 1306 } 1307 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 1312 /* -------------------------------------------------------------------*/ 1313 static struct _MatOps MatOps_Values = { 1314 MatSetValues_MPISBAIJ, 1315 MatGetRow_MPISBAIJ, 1316 MatRestoreRow_MPISBAIJ, 1317 MatMult_MPISBAIJ, 1318 /* 4*/ MatMultAdd_MPISBAIJ, 1319 MatMultTranspose_MPISBAIJ, 1320 MatMultTransposeAdd_MPISBAIJ, 1321 0, 1322 0, 1323 0, 1324 /*10*/ 0, 1325 0, 1326 0, 1327 MatRelax_MPISBAIJ, 1328 MatTranspose_MPISBAIJ, 1329 /*15*/ MatGetInfo_MPISBAIJ, 1330 MatEqual_MPISBAIJ, 1331 MatGetDiagonal_MPISBAIJ, 1332 MatDiagonalScale_MPISBAIJ, 1333 MatNorm_MPISBAIJ, 1334 /*20*/ MatAssemblyBegin_MPISBAIJ, 1335 MatAssemblyEnd_MPISBAIJ, 1336 0, 1337 MatSetOption_MPISBAIJ, 1338 MatZeroEntries_MPISBAIJ, 1339 /*25*/ 0, 1340 0, 1341 0, 1342 0, 1343 0, 1344 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1345 0, 1346 0, 1347 0, 1348 0, 1349 /*35*/ MatDuplicate_MPISBAIJ, 1350 0, 1351 0, 1352 0, 1353 0, 1354 /*40*/ 0, 1355 MatGetSubMatrices_MPISBAIJ, 1356 MatIncreaseOverlap_MPISBAIJ, 1357 MatGetValues_MPISBAIJ, 1358 0, 1359 /*45*/ MatPrintHelp_MPISBAIJ, 1360 MatScale_MPISBAIJ, 1361 0, 1362 0, 1363 0, 1364 /*50*/ 0, 1365 0, 1366 0, 1367 0, 1368 0, 1369 /*55*/ 0, 1370 0, 1371 MatSetUnfactored_MPISBAIJ, 1372 0, 1373 MatSetValuesBlocked_MPISBAIJ, 1374 /*60*/ 0, 1375 0, 1376 0, 1377 MatGetPetscMaps_Petsc, 1378 0, 1379 /*65*/ 0, 1380 0, 1381 0, 1382 0, 1383 0, 1384 /*70*/ MatGetRowMax_MPISBAIJ, 1385 0, 1386 0, 1387 0, 1388 0, 1389 /*75*/ 0, 1390 0, 1391 0, 1392 0, 1393 0, 1394 /*80*/ 0, 1395 0, 1396 0, 1397 0, 1398 MatLoad_MPISBAIJ, 1399 /*85*/ 0, 1400 0, 1401 0, 1402 0, 1403 0, 1404 /*90*/ 0, 1405 0, 1406 0, 1407 0, 1408 0, 1409 /*95*/ 0, 1410 0, 1411 0, 1412 0}; 1413 1414 1415 EXTERN_C_BEGIN 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1418 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1419 { 1420 PetscFunctionBegin; 1421 *a = ((Mat_MPISBAIJ *)A->data)->A; 1422 *iscopy = PETSC_FALSE; 1423 PetscFunctionReturn(0); 1424 } 1425 EXTERN_C_END 1426 1427 EXTERN_C_BEGIN 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1430 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1431 { 1432 Mat_MPISBAIJ *b; 1433 PetscErrorCode ierr; 1434 PetscInt i,mbs,Mbs; 1435 1436 PetscFunctionBegin; 1437 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1438 1439 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1440 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1441 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1442 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1443 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1444 if (d_nnz) { 1445 for (i=0; i<B->m/bs; i++) { 1446 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1447 } 1448 } 1449 if (o_nnz) { 1450 for (i=0; i<B->m/bs; i++) { 1451 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1452 } 1453 } 1454 B->preallocated = PETSC_TRUE; 1455 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1456 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1457 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1458 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1459 1460 b = (Mat_MPISBAIJ*)B->data; 1461 mbs = B->m/bs; 1462 Mbs = B->M/bs; 1463 if (mbs*bs != B->m) { 1464 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs); 1465 } 1466 1467 B->bs = bs; 1468 b->bs2 = bs*bs; 1469 b->mbs = mbs; 1470 b->nbs = mbs; 1471 b->Mbs = Mbs; 1472 b->Nbs = Mbs; 1473 1474 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 1475 b->rowners[0] = 0; 1476 for (i=2; i<=b->size; i++) { 1477 b->rowners[i] += b->rowners[i-1]; 1478 } 1479 b->rstart = b->rowners[b->rank]; 1480 b->rend = b->rowners[b->rank+1]; 1481 b->cstart = b->rstart; 1482 b->cend = b->rend; 1483 for (i=0; i<=b->size; i++) { 1484 b->rowners_bs[i] = b->rowners[i]*bs; 1485 } 1486 b->rstart_bs = b-> rstart*bs; 1487 b->rend_bs = b->rend*bs; 1488 1489 b->cstart_bs = b->cstart*bs; 1490 b->cend_bs = b->cend*bs; 1491 1492 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1493 ierr = MatSetSizes(b->A,B->m,B->m,B->m,B->m);CHKERRQ(ierr); 1494 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1495 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1496 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1497 1498 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1499 ierr = MatSetSizes(b->B,B->m,B->M,B->m,B->M);CHKERRQ(ierr); 1500 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1501 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1502 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1503 1504 /* build cache for off array entries formed */ 1505 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1506 1507 PetscFunctionReturn(0); 1508 } 1509 EXTERN_C_END 1510 1511 /*MC 1512 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1513 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1514 1515 Options Database Keys: 1516 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1517 1518 Level: beginner 1519 1520 .seealso: MatCreateMPISBAIJ 1521 M*/ 1522 1523 EXTERN_C_BEGIN 1524 #undef __FUNCT__ 1525 #define __FUNCT__ "MatCreate_MPISBAIJ" 1526 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1527 { 1528 Mat_MPISBAIJ *b; 1529 PetscErrorCode ierr; 1530 PetscTruth flg; 1531 1532 PetscFunctionBegin; 1533 1534 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1535 B->data = (void*)b; 1536 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1537 1538 B->ops->destroy = MatDestroy_MPISBAIJ; 1539 B->ops->view = MatView_MPISBAIJ; 1540 B->mapping = 0; 1541 B->factor = 0; 1542 B->assembled = PETSC_FALSE; 1543 1544 B->insertmode = NOT_SET_VALUES; 1545 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1546 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1547 1548 /* build local table of row and column ownerships */ 1549 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 1550 b->cowners = b->rowners + b->size + 2; 1551 b->rowners_bs = b->cowners + b->size + 2; 1552 ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1553 1554 /* build cache for off array entries formed */ 1555 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1556 b->donotstash = PETSC_FALSE; 1557 b->colmap = PETSC_NULL; 1558 b->garray = PETSC_NULL; 1559 b->roworiented = PETSC_TRUE; 1560 1561 #if defined(PETSC_USE_MAT_SINGLE) 1562 /* stuff for MatSetValues_XXX in single precision */ 1563 b->setvalueslen = 0; 1564 b->setvaluescopy = PETSC_NULL; 1565 #endif 1566 1567 /* stuff used in block assembly */ 1568 b->barray = 0; 1569 1570 /* stuff used for matrix vector multiply */ 1571 b->lvec = 0; 1572 b->Mvctx = 0; 1573 b->slvec0 = 0; 1574 b->slvec0b = 0; 1575 b->slvec1 = 0; 1576 b->slvec1a = 0; 1577 b->slvec1b = 0; 1578 b->sMvctx = 0; 1579 1580 /* stuff for MatGetRow() */ 1581 b->rowindices = 0; 1582 b->rowvalues = 0; 1583 b->getrowactive = PETSC_FALSE; 1584 1585 /* hash table stuff */ 1586 b->ht = 0; 1587 b->hd = 0; 1588 b->ht_size = 0; 1589 b->ht_flag = PETSC_FALSE; 1590 b->ht_fact = 0; 1591 b->ht_total_ct = 0; 1592 b->ht_insert_ct = 0; 1593 1594 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1595 if (flg) { 1596 PetscReal fact = 1.39; 1597 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1598 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1599 if (fact <= 1.0) fact = 1.39; 1600 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1601 ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr); 1602 } 1603 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1604 "MatStoreValues_MPISBAIJ", 1605 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1606 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1607 "MatRetrieveValues_MPISBAIJ", 1608 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1609 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1610 "MatGetDiagonalBlock_MPISBAIJ", 1611 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1612 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1613 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1614 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1615 B->symmetric = PETSC_TRUE; 1616 B->structurally_symmetric = PETSC_TRUE; 1617 B->symmetric_set = PETSC_TRUE; 1618 B->structurally_symmetric_set = PETSC_TRUE; 1619 PetscFunctionReturn(0); 1620 } 1621 EXTERN_C_END 1622 1623 /*MC 1624 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1625 1626 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1627 and MATMPISBAIJ otherwise. 1628 1629 Options Database Keys: 1630 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1631 1632 Level: beginner 1633 1634 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1635 M*/ 1636 1637 EXTERN_C_BEGIN 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "MatCreate_SBAIJ" 1640 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1641 { 1642 PetscErrorCode ierr; 1643 PetscMPIInt size; 1644 1645 PetscFunctionBegin; 1646 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1647 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1648 if (size == 1) { 1649 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1650 } else { 1651 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1652 } 1653 PetscFunctionReturn(0); 1654 } 1655 EXTERN_C_END 1656 1657 #undef __FUNCT__ 1658 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1659 /*@C 1660 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1661 the user should preallocate the matrix storage by setting the parameters 1662 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1663 performance can be increased by more than a factor of 50. 1664 1665 Collective on Mat 1666 1667 Input Parameters: 1668 + A - the matrix 1669 . bs - size of blockk 1670 . d_nz - number of block nonzeros per block row in diagonal portion of local 1671 submatrix (same for all local rows) 1672 . d_nnz - array containing the number of block nonzeros in the various block rows 1673 in the upper triangular and diagonal part of the in diagonal portion of the local 1674 (possibly different for each block row) or PETSC_NULL. You must leave room 1675 for the diagonal entry even if it is zero. 1676 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1677 submatrix (same for all local rows). 1678 - o_nnz - array containing the number of nonzeros in the various block rows of the 1679 off-diagonal portion of the local submatrix (possibly different for 1680 each block row) or PETSC_NULL. 1681 1682 1683 Options Database Keys: 1684 . -mat_no_unroll - uses code that does not unroll the loops in the 1685 block calculations (much slower) 1686 . -mat_block_size - size of the blocks to use 1687 1688 Notes: 1689 1690 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1691 than it must be used on all processors that share the object for that argument. 1692 1693 If the *_nnz parameter is given then the *_nz parameter is ignored 1694 1695 Storage Information: 1696 For a square global matrix we define each processor's diagonal portion 1697 to be its local rows and the corresponding columns (a square submatrix); 1698 each processor's off-diagonal portion encompasses the remainder of the 1699 local matrix (a rectangular submatrix). 1700 1701 The user can specify preallocated storage for the diagonal part of 1702 the local submatrix with either d_nz or d_nnz (not both). Set 1703 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1704 memory allocation. Likewise, specify preallocated storage for the 1705 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1706 1707 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1708 the figure below we depict these three local rows and all columns (0-11). 1709 1710 .vb 1711 0 1 2 3 4 5 6 7 8 9 10 11 1712 ------------------- 1713 row 3 | o o o d d d o o o o o o 1714 row 4 | o o o d d d o o o o o o 1715 row 5 | o o o d d d o o o o o o 1716 ------------------- 1717 .ve 1718 1719 Thus, any entries in the d locations are stored in the d (diagonal) 1720 submatrix, and any entries in the o locations are stored in the 1721 o (off-diagonal) submatrix. Note that the d matrix is stored in 1722 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1723 1724 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1725 plus the diagonal part of the d matrix, 1726 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1727 In general, for PDE problems in which most nonzeros are near the diagonal, 1728 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1729 or you will get TERRIBLE performance; see the users' manual chapter on 1730 matrices. 1731 1732 Level: intermediate 1733 1734 .keywords: matrix, block, aij, compressed row, sparse, parallel 1735 1736 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1737 @*/ 1738 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1739 { 1740 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1741 1742 PetscFunctionBegin; 1743 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1744 if (f) { 1745 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1746 } 1747 PetscFunctionReturn(0); 1748 } 1749 1750 #undef __FUNCT__ 1751 #define __FUNCT__ "MatCreateMPISBAIJ" 1752 /*@C 1753 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1754 (block compressed row). For good matrix assembly performance 1755 the user should preallocate the matrix storage by setting the parameters 1756 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1757 performance can be increased by more than a factor of 50. 1758 1759 Collective on MPI_Comm 1760 1761 Input Parameters: 1762 + comm - MPI communicator 1763 . bs - size of blockk 1764 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1765 This value should be the same as the local size used in creating the 1766 y vector for the matrix-vector product y = Ax. 1767 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1768 This value should be the same as the local size used in creating the 1769 x vector for the matrix-vector product y = Ax. 1770 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1771 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1772 . d_nz - number of block nonzeros per block row in diagonal portion of local 1773 submatrix (same for all local rows) 1774 . d_nnz - array containing the number of block nonzeros in the various block rows 1775 in the upper triangular portion of the in diagonal portion of the local 1776 (possibly different for each block block row) or PETSC_NULL. 1777 You must leave room for the diagonal entry even if it is zero. 1778 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1779 submatrix (same for all local rows). 1780 - o_nnz - array containing the number of nonzeros in the various block rows of the 1781 off-diagonal portion of the local submatrix (possibly different for 1782 each block row) or PETSC_NULL. 1783 1784 Output Parameter: 1785 . A - the matrix 1786 1787 Options Database Keys: 1788 . -mat_no_unroll - uses code that does not unroll the loops in the 1789 block calculations (much slower) 1790 . -mat_block_size - size of the blocks to use 1791 . -mat_mpi - use the parallel matrix data structures even on one processor 1792 (defaults to using SeqBAIJ format on one processor) 1793 1794 Notes: 1795 The number of rows and columns must be divisible by blocksize. 1796 1797 The user MUST specify either the local or global matrix dimensions 1798 (possibly both). 1799 1800 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1801 than it must be used on all processors that share the object for that argument. 1802 1803 If the *_nnz parameter is given then the *_nz parameter is ignored 1804 1805 Storage Information: 1806 For a square global matrix we define each processor's diagonal portion 1807 to be its local rows and the corresponding columns (a square submatrix); 1808 each processor's off-diagonal portion encompasses the remainder of the 1809 local matrix (a rectangular submatrix). 1810 1811 The user can specify preallocated storage for the diagonal part of 1812 the local submatrix with either d_nz or d_nnz (not both). Set 1813 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1814 memory allocation. Likewise, specify preallocated storage for the 1815 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1816 1817 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1818 the figure below we depict these three local rows and all columns (0-11). 1819 1820 .vb 1821 0 1 2 3 4 5 6 7 8 9 10 11 1822 ------------------- 1823 row 3 | o o o d d d o o o o o o 1824 row 4 | o o o d d d o o o o o o 1825 row 5 | o o o d d d o o o o o o 1826 ------------------- 1827 .ve 1828 1829 Thus, any entries in the d locations are stored in the d (diagonal) 1830 submatrix, and any entries in the o locations are stored in the 1831 o (off-diagonal) submatrix. Note that the d matrix is stored in 1832 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1833 1834 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1835 plus the diagonal part of the d matrix, 1836 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1837 In general, for PDE problems in which most nonzeros are near the diagonal, 1838 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1839 or you will get TERRIBLE performance; see the users' manual chapter on 1840 matrices. 1841 1842 Level: intermediate 1843 1844 .keywords: matrix, block, aij, compressed row, sparse, parallel 1845 1846 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1847 @*/ 1848 1849 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 1850 { 1851 PetscErrorCode ierr; 1852 PetscMPIInt size; 1853 1854 PetscFunctionBegin; 1855 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1856 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1857 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1858 if (size > 1) { 1859 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1860 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1861 } else { 1862 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1863 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1864 } 1865 PetscFunctionReturn(0); 1866 } 1867 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1871 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1872 { 1873 Mat mat; 1874 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1875 PetscErrorCode ierr; 1876 PetscInt len=0,nt,bs=matin->bs,mbs=oldmat->mbs; 1877 PetscScalar *array; 1878 1879 PetscFunctionBegin; 1880 *newmat = 0; 1881 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1882 ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr); 1883 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1884 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1885 1886 mat->factor = matin->factor; 1887 mat->preallocated = PETSC_TRUE; 1888 mat->assembled = PETSC_TRUE; 1889 mat->insertmode = NOT_SET_VALUES; 1890 1891 a = (Mat_MPISBAIJ*)mat->data; 1892 mat->bs = matin->bs; 1893 a->bs2 = oldmat->bs2; 1894 a->mbs = oldmat->mbs; 1895 a->nbs = oldmat->nbs; 1896 a->Mbs = oldmat->Mbs; 1897 a->Nbs = oldmat->Nbs; 1898 1899 a->rstart = oldmat->rstart; 1900 a->rend = oldmat->rend; 1901 a->cstart = oldmat->cstart; 1902 a->cend = oldmat->cend; 1903 a->size = oldmat->size; 1904 a->rank = oldmat->rank; 1905 a->donotstash = oldmat->donotstash; 1906 a->roworiented = oldmat->roworiented; 1907 a->rowindices = 0; 1908 a->rowvalues = 0; 1909 a->getrowactive = PETSC_FALSE; 1910 a->barray = 0; 1911 a->rstart_bs = oldmat->rstart_bs; 1912 a->rend_bs = oldmat->rend_bs; 1913 a->cstart_bs = oldmat->cstart_bs; 1914 a->cend_bs = oldmat->cend_bs; 1915 1916 /* hash table stuff */ 1917 a->ht = 0; 1918 a->hd = 0; 1919 a->ht_size = 0; 1920 a->ht_flag = oldmat->ht_flag; 1921 a->ht_fact = oldmat->ht_fact; 1922 a->ht_total_ct = 0; 1923 a->ht_insert_ct = 0; 1924 1925 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1926 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1927 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 1928 if (oldmat->colmap) { 1929 #if defined (PETSC_USE_CTABLE) 1930 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1931 #else 1932 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1933 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1934 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1935 #endif 1936 } else a->colmap = 0; 1937 1938 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 1939 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1940 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1941 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 1942 } else a->garray = 0; 1943 1944 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1945 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1946 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1947 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1948 1949 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 1950 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 1951 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 1952 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 1953 1954 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 1955 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 1956 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 1957 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 1958 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 1959 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 1960 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 1961 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 1962 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 1963 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 1964 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 1965 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 1966 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 1967 1968 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 1969 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 1970 a->sMvctx = oldmat->sMvctx; 1971 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 1972 1973 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1974 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1975 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1976 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1977 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 1978 *newmat = mat; 1979 PetscFunctionReturn(0); 1980 } 1981 1982 #include "petscsys.h" 1983 1984 #undef __FUNCT__ 1985 #define __FUNCT__ "MatLoad_MPISBAIJ" 1986 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 1987 { 1988 Mat A; 1989 PetscErrorCode ierr; 1990 PetscInt i,nz,j,rstart,rend; 1991 PetscScalar *vals,*buf; 1992 MPI_Comm comm = ((PetscObject)viewer)->comm; 1993 MPI_Status status; 1994 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners; 1995 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1996 PetscInt *locrowlens,*procsnz = 0,jj,*mycols,*ibuf; 1997 PetscInt bs=1,Mbs,mbs,extra_rows; 1998 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 1999 PetscInt dcount,kmax,k,nzcount,tmp; 2000 int fd; 2001 2002 PetscFunctionBegin; 2003 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2004 2005 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2006 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2007 if (!rank) { 2008 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2009 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2010 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2011 if (header[3] < 0) { 2012 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2013 } 2014 } 2015 2016 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2017 M = header[1]; N = header[2]; 2018 2019 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2020 2021 /* 2022 This code adds extra rows to make sure the number of rows is 2023 divisible by the blocksize 2024 */ 2025 Mbs = M/bs; 2026 extra_rows = bs - M + bs*(Mbs); 2027 if (extra_rows == bs) extra_rows = 0; 2028 else Mbs++; 2029 if (extra_rows &&!rank) { 2030 ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2031 } 2032 2033 /* determine ownership of all rows */ 2034 mbs = Mbs/size + ((Mbs % size) > rank); 2035 m = mbs*bs; 2036 ierr = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2037 browners = rowners + size + 1; 2038 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2039 rowners[0] = 0; 2040 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2041 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2042 rstart = rowners[rank]; 2043 rend = rowners[rank+1]; 2044 2045 /* distribute row lengths to all processors */ 2046 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2047 if (!rank) { 2048 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2049 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2050 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2051 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2052 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2053 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2054 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2055 } else { 2056 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2057 } 2058 2059 if (!rank) { /* procs[0] */ 2060 /* calculate the number of nonzeros on each processor */ 2061 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2062 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2063 for (i=0; i<size; i++) { 2064 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2065 procsnz[i] += rowlengths[j]; 2066 } 2067 } 2068 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2069 2070 /* determine max buffer needed and allocate it */ 2071 maxnz = 0; 2072 for (i=0; i<size; i++) { 2073 maxnz = PetscMax(maxnz,procsnz[i]); 2074 } 2075 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2076 2077 /* read in my part of the matrix column indices */ 2078 nz = procsnz[0]; 2079 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2080 mycols = ibuf; 2081 if (size == 1) nz -= extra_rows; 2082 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2083 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2084 2085 /* read in every ones (except the last) and ship off */ 2086 for (i=1; i<size-1; i++) { 2087 nz = procsnz[i]; 2088 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2089 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2090 } 2091 /* read in the stuff for the last proc */ 2092 if (size != 1) { 2093 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2094 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2095 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2096 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2097 } 2098 ierr = PetscFree(cols);CHKERRQ(ierr); 2099 } else { /* procs[i], i>0 */ 2100 /* determine buffer space needed for message */ 2101 nz = 0; 2102 for (i=0; i<m; i++) { 2103 nz += locrowlens[i]; 2104 } 2105 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2106 mycols = ibuf; 2107 /* receive message of column indices*/ 2108 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2109 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2110 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2111 } 2112 2113 /* loop over local rows, determining number of off diagonal entries */ 2114 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2115 odlens = dlens + (rend-rstart); 2116 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2117 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2118 masked1 = mask + Mbs; 2119 masked2 = masked1 + Mbs; 2120 rowcount = 0; nzcount = 0; 2121 for (i=0; i<mbs; i++) { 2122 dcount = 0; 2123 odcount = 0; 2124 for (j=0; j<bs; j++) { 2125 kmax = locrowlens[rowcount]; 2126 for (k=0; k<kmax; k++) { 2127 tmp = mycols[nzcount++]/bs; /* block col. index */ 2128 if (!mask[tmp]) { 2129 mask[tmp] = 1; 2130 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2131 else masked1[dcount++] = tmp; /* entry in diag portion */ 2132 } 2133 } 2134 rowcount++; 2135 } 2136 2137 dlens[i] = dcount; /* d_nzz[i] */ 2138 odlens[i] = odcount; /* o_nzz[i] */ 2139 2140 /* zero out the mask elements we set */ 2141 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2142 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2143 } 2144 2145 /* create our matrix */ 2146 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2147 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2148 ierr = MatSetType(A,type);CHKERRQ(ierr); 2149 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2150 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2151 2152 if (!rank) { 2153 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2154 /* read in my part of the matrix numerical values */ 2155 nz = procsnz[0]; 2156 vals = buf; 2157 mycols = ibuf; 2158 if (size == 1) nz -= extra_rows; 2159 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2160 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2161 2162 /* insert into matrix */ 2163 jj = rstart*bs; 2164 for (i=0; i<m; i++) { 2165 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2166 mycols += locrowlens[i]; 2167 vals += locrowlens[i]; 2168 jj++; 2169 } 2170 2171 /* read in other processors (except the last one) and ship out */ 2172 for (i=1; i<size-1; i++) { 2173 nz = procsnz[i]; 2174 vals = buf; 2175 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2176 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2177 } 2178 /* the last proc */ 2179 if (size != 1){ 2180 nz = procsnz[i] - extra_rows; 2181 vals = buf; 2182 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2183 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2184 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2185 } 2186 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2187 2188 } else { 2189 /* receive numeric values */ 2190 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2191 2192 /* receive message of values*/ 2193 vals = buf; 2194 mycols = ibuf; 2195 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2196 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2197 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2198 2199 /* insert into matrix */ 2200 jj = rstart*bs; 2201 for (i=0; i<m; i++) { 2202 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2203 mycols += locrowlens[i]; 2204 vals += locrowlens[i]; 2205 jj++; 2206 } 2207 } 2208 2209 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2210 ierr = PetscFree(buf);CHKERRQ(ierr); 2211 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2212 ierr = PetscFree(rowners);CHKERRQ(ierr); 2213 ierr = PetscFree(dlens);CHKERRQ(ierr); 2214 ierr = PetscFree(mask);CHKERRQ(ierr); 2215 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2216 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2217 *newmat = A; 2218 PetscFunctionReturn(0); 2219 } 2220 2221 #undef __FUNCT__ 2222 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2223 /*XXXXX@ 2224 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2225 2226 Input Parameters: 2227 . mat - the matrix 2228 . fact - factor 2229 2230 Collective on Mat 2231 2232 Level: advanced 2233 2234 Notes: 2235 This can also be set by the command line option: -mat_use_hash_table fact 2236 2237 .keywords: matrix, hashtable, factor, HT 2238 2239 .seealso: MatSetOption() 2240 @XXXXX*/ 2241 2242 2243 #undef __FUNCT__ 2244 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2245 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2246 { 2247 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2248 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2249 PetscReal atmp; 2250 PetscReal *work,*svalues,*rvalues; 2251 PetscErrorCode ierr; 2252 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2253 PetscMPIInt rank,size; 2254 PetscInt *rowners_bs,dest,count,source; 2255 PetscScalar *va; 2256 MatScalar *ba; 2257 MPI_Status stat; 2258 2259 PetscFunctionBegin; 2260 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2261 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2262 2263 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2264 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2265 2266 bs = A->bs; 2267 mbs = a->mbs; 2268 Mbs = a->Mbs; 2269 ba = b->a; 2270 bi = b->i; 2271 bj = b->j; 2272 2273 /* find ownerships */ 2274 rowners_bs = a->rowners_bs; 2275 2276 /* each proc creates an array to be distributed */ 2277 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2278 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2279 2280 /* row_max for B */ 2281 if (rank != size-1){ 2282 for (i=0; i<mbs; i++) { 2283 ncols = bi[1] - bi[0]; bi++; 2284 brow = bs*i; 2285 for (j=0; j<ncols; j++){ 2286 bcol = bs*(*bj); 2287 for (kcol=0; kcol<bs; kcol++){ 2288 col = bcol + kcol; /* local col index */ 2289 col += rowners_bs[rank+1]; /* global col index */ 2290 for (krow=0; krow<bs; krow++){ 2291 atmp = PetscAbsScalar(*ba); ba++; 2292 row = brow + krow; /* local row index */ 2293 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2294 if (work[col] < atmp) work[col] = atmp; 2295 } 2296 } 2297 bj++; 2298 } 2299 } 2300 2301 /* send values to its owners */ 2302 for (dest=rank+1; dest<size; dest++){ 2303 svalues = work + rowners_bs[dest]; 2304 count = rowners_bs[dest+1]-rowners_bs[dest]; 2305 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2306 } 2307 } 2308 2309 /* receive values */ 2310 if (rank){ 2311 rvalues = work; 2312 count = rowners_bs[rank+1]-rowners_bs[rank]; 2313 for (source=0; source<rank; source++){ 2314 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2315 /* process values */ 2316 for (i=0; i<count; i++){ 2317 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2318 } 2319 } 2320 } 2321 2322 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2323 ierr = PetscFree(work);CHKERRQ(ierr); 2324 PetscFunctionReturn(0); 2325 } 2326 2327 #undef __FUNCT__ 2328 #define __FUNCT__ "MatRelax_MPISBAIJ" 2329 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2330 { 2331 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2332 PetscErrorCode ierr; 2333 PetscInt mbs=mat->mbs,bs=matin->bs; 2334 PetscScalar *x,*b,*ptr,zero=0.0; 2335 Vec bb1; 2336 2337 PetscFunctionBegin; 2338 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2339 if (bs > 1) 2340 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2341 2342 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2343 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2344 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2345 its--; 2346 } 2347 2348 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2349 while (its--){ 2350 2351 /* lower triangular part: slvec0b = - B^T*xx */ 2352 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2353 2354 /* copy xx into slvec0a */ 2355 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2356 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2357 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2358 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2359 2360 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2361 2362 /* copy bb into slvec1a */ 2363 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2364 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2365 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2366 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2367 2368 /* set slvec1b = 0 */ 2369 ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr); 2370 2371 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2372 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2373 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2374 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2375 2376 /* upper triangular part: bb1 = bb1 - B*x */ 2377 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2378 2379 /* local diagonal sweep */ 2380 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2381 } 2382 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2383 } else { 2384 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2385 } 2386 PetscFunctionReturn(0); 2387 } 2388 2389 #undef __FUNCT__ 2390 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2391 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2392 { 2393 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2394 PetscErrorCode ierr; 2395 Vec lvec1,bb1; 2396 2397 PetscFunctionBegin; 2398 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2399 if (matin->bs > 1) 2400 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2401 2402 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2403 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2404 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2405 its--; 2406 } 2407 2408 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2409 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2410 while (its--){ 2411 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2412 2413 /* lower diagonal part: bb1 = bb - B^T*xx */ 2414 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2415 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2416 2417 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2418 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2419 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2420 2421 /* upper diagonal part: bb1 = bb1 - B*x */ 2422 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2423 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2424 2425 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2426 2427 /* diagonal sweep */ 2428 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2429 } 2430 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2431 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2432 } else { 2433 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2434 } 2435 PetscFunctionReturn(0); 2436 } 2437 2438